First cut at doing something reasonable with OR-of-ANDs WHERE

conditions. There are some pretty bogus heuristics in prepqual.c that try to decide whether to output CNF or DNF format; they need to be replaced, likely. Right now the code is probably too willing to choose DNF form, which might hurt performance in some cases that used to work OK. But at least we have a foundation to build on.
27 years ago · 43d32d3683
parent b705fa3909
commit 43d32d3683
5 changed files with 269 additions and 21 deletions
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.70 1999/08/21 03:49:00 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.71 1999/09/13 00:17:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -48,6 +48,9 @@ static void match_index_orclauses(RelOptInfo *rel, RelOptInfo *index, int indexk
 					  int xclass, List *restrictinfo_list);
 static List *match_index_orclause(RelOptInfo *rel, RelOptInfo *index, int indexkey,
 			 int xclass, List *or_clauses, List *other_matching_indices);
+static bool match_or_subclause_to_indexkey(RelOptInfo *rel, RelOptInfo *index,
+										   int indexkey, int xclass,
+										   Expr *clause);
 static List *group_clauses_by_indexkey(RelOptInfo *rel, RelOptInfo *index,
 				  int *indexkeys, Oid *classes, List *restrictinfo_list);
 static List *group_clauses_by_ikey_for_joins(RelOptInfo *rel, RelOptInfo *index,
@ -327,8 +330,8 @@ match_index_orclause(RelOptInfo *rel,
 	{
 		Expr	   *clause = lfirst(clist);

-		if (match_clause_to_indexkey(rel, index, indexkey, xclass,
-									 clause, false))
+		if (match_or_subclause_to_indexkey(rel, index, indexkey, xclass,
+										   clause))
 		{
 			/* OK to add this index to sublist for this subclause */
 			lfirst(matching_indices) = lcons(index,
@ -341,6 +344,38 @@ match_index_orclause(RelOptInfo *rel,
 	return index_list;
 }

+/*
+ * See if a subclause of an OR clause matches an index.
+ *
+ * We accept the subclause if it is an operator clause that matches the
+ * index, or if it is an AND clause all of whose members are operators
+ * that match the index.  (XXX Would accepting a single match be useful?)
+ */
+static bool
+match_or_subclause_to_indexkey(RelOptInfo *rel,
+							   RelOptInfo *index,
+							   int indexkey,
+							   int xclass,
+							   Expr *clause)
+{
+	if (and_clause((Node *) clause))
+	{
+		List	   *item;
+
+		foreach(item, clause->args)
+		{
+			if (! match_clause_to_indexkey(rel, index, indexkey, xclass,
+										   lfirst(item), false))
+				return false;
+		}
+		return true;
+	}
+	else
+		return match_clause_to_indexkey(rel, index, indexkey, xclass,
+										clause, false);
+}
+
+
 /****************************************************************************
 *				----  ROUTINES TO CHECK RESTRICTIONS  ----
 ****************************************************************************/
@ -559,7 +594,8 @@ group_clauses_by_ikey_for_joins(RelOptInfo *rel,
 *
 * Returns true if the clause can be used with this index key.
 *
- * NOTE:  returns false if clause is an or_clause; that's handled elsewhere.
+ * NOTE:  returns false if clause is an OR or AND clause; to the extent
+ * we cope with those at all, it is done by higher-level routines.
 */
 static bool
 match_clause_to_indexkey(RelOptInfo *rel,
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.43 1999/08/22 23:56:45 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.44 1999/09/13 00:17:25 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -75,9 +75,9 @@ query_planner(Query *root,
 	if (root->hasSubLinks)
 		qual = (List *) SS_process_sublinks((Node *) qual);

-	qual = cnfify((Expr *) qual, true);
+	qual = canonicalize_qual((Expr *) qual, true);
 #ifdef OPTIMIZER_DEBUG
-	printf("After cnfify()\n");
+	printf("After canonicalize_qual()\n");
 	pprint(qual);
 #endif

--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.66 1999/08/26 05:07:41 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.67 1999/09/13 00:17:25 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -324,8 +324,9 @@ union_planner(Query *parse)
 				elog(ERROR, "Sub-SELECT in HAVING clause must use only GROUPed attributes from outer SELECT");
 		}

-		/* convert the havingQual to conjunctive normal form (cnf) */
-		parse->havingQual = (Node *) cnfify((Expr *) parse->havingQual, true);
+		/* convert the havingQual to implicit-AND normal form */
+		parse->havingQual = (Node *)
+			canonicalize_qual((Expr *) parse->havingQual, true);

 		/*
 		 * Require an aggregate function to appear in each clause of the
--- a/src/backend/optimizer/prep/prepqual.c
+++ b/src/backend/optimizer/prep/prepqual.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.19 1999/09/12 18:08:17 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.20 1999/09/13 00:17:13 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -29,6 +29,9 @@ static Expr *find_ors(Expr *qual);
 static Expr *or_normalize(List *orlist);
 static Expr *find_ands(Expr *qual);
 static Expr *and_normalize(List *andlist);
+static Expr *qual_cleanup(Expr *qual);
+static List *remove_duplicates(List *list);
+static int	count_bool_nodes(Expr *qual);

 /*****************************************************************************
 *
@ -37,20 +40,124 @@ static Expr *and_normalize(List *andlist);
 *		These routines convert an arbitrary boolean expression into
 *		conjunctive normal form or disjunctive normal form.
 *
- *		The result of these routines differs from a "true" CNF/DNF in that
- *		we do not bother to detect common subexpressions; e.g., ("AND" A A)
- *		does not get simplified to A.  Testing for identical subexpressions
- *		is a waste of time if the query is written intelligently, and it
- *		takes an unreasonable amount of time if there are many subexpressions
- *		(since it's roughly O(N^2) in the number of subexpressions).
+ *		Normalization is only carried out in the top AND/OR/NOT portion
+ *		of the given tree; we do not attempt to normalize boolean expressions
+ *		that may appear as arguments of operators or functions in the tree.
 *
- *		Because of that restriction, it would be unwise to apply dnfify()
- *		to the result of cnfify() or vice versa.  Instead apply both to
- *		the original user-written qual expression.
+ *		Query qualifications (WHERE clauses) are ordinarily transformed into
+ *		CNF, ie, AND-of-ORs form, because then the optimizer can use any one
+ *		of the independent AND clauses as a filtering qualification.  However,
+ *		quals that are naturally expressed as OR-of-ANDs can suffer an
+ *		exponential growth in size in this transformation, so we also consider
+ *		converting to DNF (OR-of-ANDs), and we may also leave well enough alone
+ *		if both transforms cause unreasonable growth.  The OR-of-ANDs format
+ *		is useful for indexscan implementation, so we prefer that format when
+ *		there is just one relation involved.
 *
+ *		canonicalize_qual() does "smart" conversion to either CNF or DNF, per
+ *		the above considerations, while cnfify() and dnfify() simply perform
+ *		the demanded transformation.  The latter two may become dead code
+ *		eventually.
 *****************************************************************************/


+/*
+ * canonicalize_qual
+ *	  Convert a qualification to the most useful normalized form.
+ *
+ * Returns the modified qualification.
+ *
+ * If 'removeAndFlag' is true then it removes explicit AND at the top level,
+ * producing a list of implicitly-ANDed conditions.  Otherwise, a regular
+ * boolean expression is returned.  Since most callers pass 'true', we
+ * prefer to declare the result as List *, not Expr *.
+ *
+ * XXX This code could be much smarter, at the cost of also being slower,
+ * if we tried to compute selectivities and/or see whether there are
+ * actually indexes to support an indexscan implementation of a DNF qual.
+ * We could even try converting the CNF clauses that mention a single
+ * relation into a single DNF clause to see if that looks cheaper to
+ * implement.  For now, though, we just try to avoid doing anything
+ * quite as stupid as unconditionally converting to CNF was...
+ */
+List *
+canonicalize_qual(Expr *qual, bool removeAndFlag)
+{
+	Expr	   *newqual,
+			   *cnfqual,
+			   *dnfqual;
+	int			qualcnt,
+				cnfcnt,
+				dnfcnt;
+
+	if (qual == NULL)
+		return NIL;
+
+	/* Flatten AND and OR groups throughout the tree.
+	 * This improvement is always worthwhile, so do it unconditionally.
+	 */
+	qual = flatten_andors(qual);
+	/* Push down NOTs.  We do this only in the top-level boolean
+	 * expression, without examining arguments of operators/functions.
+	 * Even so, it might not be a win if we are unable to find negators
+	 * for all the operators involved; so we keep the flattened-but-not-
+	 * NOT-pushed qual as the reference point for comparsions.
+	 */
+	newqual = find_nots(qual);
+	/*
+	 * Generate both CNF and DNF forms from newqual.
+	 */
+	/* Normalize into conjunctive normal form, and clean up the result. */
+	cnfqual = qual_cleanup(find_ors(newqual));
+	/* Likewise for DNF */
+	dnfqual = qual_cleanup(find_ands(newqual));
+
+	/*
+	 * Now, choose whether to return qual, cnfqual, or dnfqual.
+	 *
+	 * First heuristic is to forget about either CNF or DNF if it shows
+	 * unreasonable growth compared to the original form of the qual,
+	 * where we define "unreasonable" a tad arbitrarily as 4x more
+	 * operators.
+	 */
+	qualcnt = count_bool_nodes(qual);
+	cnfcnt = count_bool_nodes(cnfqual);
+	dnfcnt = count_bool_nodes(dnfqual);
+	if (cnfcnt >= 4 * qualcnt)
+		cnfqual = NULL;			/* mark CNF not usable */
+	if (dnfcnt >= 4 * qualcnt)
+		dnfqual = NULL;			/* mark DNF not usable */
+
+	/*
+	 * Second heuristic is to prefer DNF if only one relation is mentioned
+	 * and it is smaller than the CNF representation.
+	 */
+	if (dnfqual && dnfcnt < cnfcnt && NumRelids((Node *) dnfqual) == 1)
+		cnfqual = NULL;
+
+	/*
+	 * Otherwise, we prefer CNF.
+	 *
+	 * XXX obviously, these rules could be improved upon.
+	 */
+
+	/* pick preferred survivor */
+	if (cnfqual)
+		newqual = cnfqual;
+	else if (dnfqual)
+		newqual = dnfqual;
+	else
+		newqual = qual;
+
+	/* Convert to implicit-AND list if requested */
+	if (removeAndFlag)
+	{
+		newqual = (Expr *) make_ands_implicit(newqual);
+	}
+
+	return (List *) newqual;
+}
+
 /*
 * cnfify
 *	  Convert a qualification to conjunctive normal form by applying
@ -81,6 +188,8 @@ cnfify(Expr *qual, bool removeAndFlag)
 	newqual = find_nots(newqual);
 	/* Normalize into conjunctive normal form. */
 	newqual = find_ors(newqual);
+	/* Clean up the result. */
+	newqual = qual_cleanup(newqual);

 	if (removeAndFlag)
 	{
@ -118,6 +227,8 @@ dnfify(Expr *qual)
 	newqual = find_nots(newqual);
 	/* Normalize into disjunctive normal form. */
 	newqual = find_ands(newqual);
+	/* Clean up the result. */
+	newqual = qual_cleanup(newqual);

 	return newqual;
 }
@ -641,3 +752,102 @@ and_normalize(List *andlist)
 	/* pull_ors is needed in case any sub-and_normalize succeeded */
 	return make_orclause(pull_ors(orclauses));
 }
+
+/*
+ * qual_cleanup
+ *	  Fix up a qualification by removing duplicate entries (which could be
+ *	  created during normalization, if identical subexpressions from different
+ *	  parts of the tree are brought together).  Also, check for AND and OR
+ *	  clauses with only one remaining subexpression, and simplify.
+ *
+ * Returns the modified qualification.
+ */
+static Expr *
+qual_cleanup(Expr *qual)
+{
+	if (qual == NULL)
+		return NULL;
+
+	if (and_clause((Node *) qual))
+	{
+		List	   *andlist = NIL;
+		List	   *temp;
+
+		foreach(temp, qual->args)
+			andlist = lappend(andlist, qual_cleanup(lfirst(temp)));
+
+		andlist = remove_duplicates(pull_ands(andlist));
+
+		if (length(andlist) > 1)
+			return make_andclause(andlist);
+		else
+			return lfirst(andlist);
+	}
+	else if (or_clause((Node *) qual))
+	{
+		List	   *orlist = NIL;
+		List	   *temp;
+
+		foreach(temp, qual->args)
+			orlist = lappend(orlist, qual_cleanup(lfirst(temp)));
+
+		orlist = remove_duplicates(pull_ors(orlist));
+
+		if (length(orlist) > 1)
+			return make_orclause(orlist);
+		else
+			return lfirst(orlist);
+	}
+	else if (not_clause((Node *) qual))
+		return make_notclause(qual_cleanup(get_notclausearg(qual)));
+	else
+		return qual;
+}
+
+/*
+ * remove_duplicates
+ */
+static List *
+remove_duplicates(List *list)
+{
+	List	   *result = NIL;
+	List	   *i;
+
+	if (length(list) <= 1)
+		return list;
+
+	foreach(i, list)
+	{
+		if (! member(lfirst(i), result))
+			result = lappend(result, lfirst(i));
+	}
+	return result;
+}
+
+/*
+ * count_bool_nodes
+ *		Support for heuristics in canonicalize_qual(): count the
+ *		number of nodes in the top level AND/OR/NOT part of a qual tree
+ */
+static int
+count_bool_nodes(Expr *qual)
+{
+	if (qual == NULL)
+		return 0;
+
+	if (and_clause((Node *) qual) ||
+		or_clause((Node *) qual))
+	{
+		int			sum = 1;	/* for the and/or itself */
+		List	   *temp;
+
+		foreach(temp, qual->args)
+			sum += count_bool_nodes(lfirst(temp));
+
+		return sum;
+	}
+	else if (not_clause((Node *) qual))
+		return count_bool_nodes(get_notclausearg(qual)) + 1;
+	else
+		return 1;				/* anything else counts 1 for my purposes */
+}
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@ -6,7 +6,7 @@
 *
 * Copyright (c) 1994, Regents of the University of California
 *
- * $Id: prep.h,v 1.18 1999/09/12 18:08:10 tgl Exp $
+ * $Id: prep.h,v 1.19 1999/09/13 00:17:08 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -19,6 +19,7 @@
 /*
 * prototypes for prepqual.c
 */
+extern List *canonicalize_qual(Expr *qual, bool removeAndFlag);
 extern List *cnfify(Expr *qual, bool removeAndFlag);
 extern Expr *dnfify(Expr *qual);