This is the solution I came up with, which is completely dynamic so df_criterias can have as many condition columns as it wants
df_criterias = spark.createDataFrame(
[
("IN ('ABC')", "IN ('XYZ')", "<2021", "", "Top"),
("IN ('ABC')", "NOT IN ('JKL','MNO')", "IN ('2021')", "", "Bottom"),
],
["CriteriaA", "CriteriaB", "CriteriaC", "CriteriaD", "Result"]
)
dict = {
"CriteriaA" : "ColumnA",
"CriteriaB" : "ColumnB",
"CriteriaC" : "ColumnC",
"CriteriaD" : "ColumnD"
}
# Rename rule columns and retrieve only columns defined in dictionary above
df_criterias_renamed = df_criterias.select([col(x).alias(dict.get(x, x)) for x in dict.keys()])
# Set up all combinations of rules
rows_criterias = df_criterias_renamed.distinct().collect()
# Cycle through rules
for row in rows_criterias:
filters = row.asDict()
# Ignore if filter is blank
where_clause_list = [f"{k} {v}" for k,v in filters.items() if v != "" and k!= "Result"]
# Combine all clauses together
where_clause = " AND ".join(where_clause_list)
print(where_clause)