Further work on making use of new statistics in planner. Adjust APIs

of costsize.c routines to pass Query root, so that costsize can figure more things out by itself and not be so dependent on its callers to tell it everything it needs to know. Use selectivity of hash or merge clause to estimate number of tuples processed internally in these joins (this is more useful than it would've been before, since eqjoinsel is somewhat more accurate than before).
25 years ago · 7c579fa12d
parent 28d2420eef
commit 7c579fa12d
20 changed files with 337 additions and 195 deletions
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@ -15,7 +15,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.142 2001/05/20 20:28:17 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.143 2001/06/05 05:26:03 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1361,9 +1361,10 @@ _copyRestrictInfo(RestrictInfo *from)
 	 * copy remainder of node
 	 */
 	Node_Copy(from, newnode, clause);
-	newnode->eval_cost = from->eval_cost;
 	newnode->ispusheddown = from->ispusheddown;
 	Node_Copy(from, newnode, subclauseindices);
+	newnode->eval_cost = from->eval_cost;
+	newnode->this_selec = from->this_selec;
 	newnode->mergejoinoperator = from->mergejoinoperator;
 	newnode->left_sortop = from->left_sortop;
 	newnode->right_sortop = from->right_sortop;
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@ -20,7 +20,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.90 2001/05/20 20:28:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.91 2001/06/05 05:26:03 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -514,14 +514,14 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 {
 	if (!equal(a->clause, b->clause))
 		return false;
-
-	/*
-	 * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
-	 * since they may not be set yet, and should be derivable from the
-	 * clause anyway
-	 */
 	if (a->ispusheddown != b->ispusheddown)
 		return false;
+	/*
+	 * We ignore eval_cost, this_selec, left/right_pathkey, and
+	 * left/right_bucketsize, since they may not be set yet, and should be
+	 * derivable from the clause anyway.  Probably it's not really necessary
+	 * to compare any of these remaining fields ...
+	 */
 	if (!equal(a->subclauseindices, b->subclauseindices))
 		return false;
 	if (a->mergejoinoperator != b->mergejoinoperator)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.109 2001/05/20 20:28:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.110 2001/06/05 05:26:04 tgl Exp $
 *
 * NOTES
 *	  Most of the read functions for plan nodes are tested. (In fact, they
@ -1792,6 +1792,8 @@ _readRestrictInfo(void)

 	/* eval_cost is not part of saved representation; compute on first use */
 	local_node->eval_cost = -1;
+	/* ditto for this_selec */
+	local_node->this_selec = -1;
 	/* ditto for cached pathkeys and bucketsize */
 	local_node->left_pathkey = NIL;
 	local_node->right_pathkey = NIL;
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.74 2001/05/20 20:28:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -223,7 +223,7 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 */

 	/* Consider sequential scan */
-	add_path(rel, create_seqscan_path(rel));
+	add_path(rel, create_seqscan_path(root, rel));

 	/* Consider TID scans */
 	create_tidscan_paths(root, rel);
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.44 2001/05/20 20:28:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -48,9 +48,6 @@ typedef struct RangeQueryClause

 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 						   bool varonleft, bool isLTsel, Selectivity s2);
-static Selectivity clause_selectivity(Query *root,
-				   Node *clause,
-				   int varRelid);


 /****************************************************************************
@ -364,7 +361,7 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
 * When varRelid is 0, all variables are treated as variables.	This
 * is appropriate for ordinary join clauses and restriction clauses.
 */
-static Selectivity
+Selectivity
 clause_selectivity(Query *root,
 				   Node *clause,
 				   int varRelid)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -42,7 +42,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.74 2001/05/20 20:28:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -83,7 +83,9 @@ bool		enable_mergejoin = true;
 bool		enable_hashjoin = true;


+static Selectivity estimate_hash_bucketsize(Query *root, Var *var);
 static bool cost_qual_eval_walker(Node *node, Cost *total);
+static Selectivity approx_selectivity(Query *root, List *quals);
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@ -99,7 +101,8 @@ static double page_size(double tuples, int width);
 * parameters, even though much of it could be extracted from the Path.
 */
 void
-cost_seqscan(Path *path, RelOptInfo *baserel)
+cost_seqscan(Path *path, Query *root,
+			 RelOptInfo *baserel)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
@ -356,10 +359,11 @@ cost_index(Path *path, Query *root,

 /*
 * cost_tidscan
- *	  Determines and returns the cost of scanning a relation using tid-s.
+ *	  Determines and returns the cost of scanning a relation using TIDs.
 */
 void
-cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
+cost_tidscan(Path *path, Query *root,
+			 RelOptInfo *baserel, List *tideval)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
@ -417,7 +421,8 @@ cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
 * but if it ever does, it should react gracefully to lack of key data.
 */
 void
-cost_sort(Path *path, List *pathkeys, double tuples, int width)
+cost_sort(Path *path, Query *root,
+		  List *pathkeys, double tuples, int width)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
@ -479,7 +484,7 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width)
 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
 */
 void
-cost_nestloop(Path *path,
+cost_nestloop(Path *path, Query *root,
 			  Path *outer_path,
 			  Path *inner_path,
 			  List *restrictlist)
@ -510,7 +515,8 @@ cost_nestloop(Path *path,
 	run_cost += outer_path->parent->rows *
 		(inner_path->total_cost - inner_path->startup_cost);
 	if (outer_path->parent->rows > 1)
-		run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
+		run_cost += (outer_path->parent->rows - 1) *
+			inner_path->startup_cost * 0.5;

 	/*
 	 * Number of tuples processed (not number emitted!).  If inner path is
@ -540,15 +546,18 @@ cost_nestloop(Path *path,
 * 'outer_path' is the path for the outer relation
 * 'inner_path' is the path for the inner relation
 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
+ * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
+ *		(this should be a subset of the restrictlist)
 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
 *				to sort the outer and inner relations, or NIL if no explicit
 *				sort is needed because the source path is already ordered
 */
 void
-cost_mergejoin(Path *path,
+cost_mergejoin(Path *path, Query *root,
 			   Path *outer_path,
 			   Path *inner_path,
 			   List *restrictlist,
+			   List *mergeclauses,
 			   List *outersortkeys,
 			   List *innersortkeys)
 {
@ -573,6 +582,7 @@ cost_mergejoin(Path *path,
 	{
 		startup_cost += outer_path->total_cost;
 		cost_sort(&sort_path,
+				  root,
 				  outersortkeys,
 				  outer_path->parent->rows,
 				  outer_path->parent->width);
@ -589,6 +599,7 @@ cost_mergejoin(Path *path,
 	{
 		startup_cost += inner_path->total_cost;
 		cost_sort(&sort_path,
+				  root,
 				  innersortkeys,
 				  inner_path->parent->rows,
 				  inner_path->parent->width);
@ -602,12 +613,24 @@ cost_mergejoin(Path *path,
 	}

 	/*
-	 * Estimate the number of tuples to be processed in the mergejoin
-	 * itself as one per tuple in the two source relations.  This could be
-	 * a drastic underestimate if there are many equal-keyed tuples in
-	 * either relation, but we have no good way of estimating that...
+	 * The number of tuple comparisons needed depends drastically on the
+	 * number of equal keys in the two source relations, which we have no
+	 * good way of estimating.  Somewhat arbitrarily, we charge one
+	 * tuple comparison (one cpu_operator_cost) for each tuple in the
+	 * two source relations.  This is probably a lower bound.
 	 */
-	ntuples = outer_path->parent->rows + inner_path->parent->rows;
+	run_cost += cpu_operator_cost *
+		(outer_path->parent->rows + inner_path->parent->rows);
+
+	/*
+	 * For each tuple that gets through the mergejoin proper, we charge
+	 * cpu_tuple_cost plus the cost of evaluating additional restriction
+	 * clauses that are to be applied at the join.  It's OK to use an
+	 * approximate selectivity here, since in most cases this is a minor
+	 * component of the cost.
+	 */
+	ntuples = approx_selectivity(root, mergeclauses) *
+		outer_path->parent->rows * inner_path->parent->rows;

 	/* CPU costs */
 	cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
@ -625,15 +648,15 @@ cost_mergejoin(Path *path,
 * 'outer_path' is the path for the outer relation
 * 'inner_path' is the path for the inner relation
 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerbucketsize' is an estimate of the bucketsize statistic
- *				for the inner hash key.
+ * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ *		(this should be a subset of the restrictlist)
 */
 void
-cost_hashjoin(Path *path,
+cost_hashjoin(Path *path, Query *root,
 			  Path *outer_path,
 			  Path *inner_path,
 			  List *restrictlist,
-			  Selectivity innerbucketsize)
+			  List *hashclauses)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
@ -644,6 +667,10 @@ cost_hashjoin(Path *path,
 	double		innerbytes = relation_byte_size(inner_path->parent->rows,
 											  inner_path->parent->width);
 	long		hashtablebytes = SortMem * 1024L;
+	RestrictInfo *restrictinfo;
+	Var		   *left,
+			   *right;
+	Selectivity innerbucketsize;

 	if (!enable_hashjoin)
 		startup_cost += disable_cost;
@ -657,6 +684,46 @@ cost_hashjoin(Path *path,
 	startup_cost += cpu_operator_cost * inner_path->parent->rows;
 	run_cost += cpu_operator_cost * outer_path->parent->rows;

+	/*
+	 * Determine bucketsize fraction for inner relation.  First we have
+	 * to figure out which side of the hashjoin clause is the inner side.
+	 */
+	Assert(length(hashclauses) == 1);
+	Assert(IsA(lfirst(hashclauses), RestrictInfo));
+	restrictinfo = (RestrictInfo *) lfirst(hashclauses);
+	/* these must be OK, since check_hashjoinable accepted the clause */
+	left = get_leftop(restrictinfo->clause);
+	right = get_rightop(restrictinfo->clause);
+
+	/*
+	 * Since we tend to visit the same clauses over and over when
+	 * planning a large query, we cache the bucketsize estimate in
+	 * the RestrictInfo node to avoid repeated lookups of statistics.
+	 */
+	if (intMember(right->varno, inner_path->parent->relids))
+	{
+		/* righthand side is inner */
+		innerbucketsize = restrictinfo->right_bucketsize;
+		if (innerbucketsize < 0)
+		{
+			/* not cached yet */
+			innerbucketsize = estimate_hash_bucketsize(root, right);
+			restrictinfo->right_bucketsize = innerbucketsize;
+		}
+	}
+	else
+	{
+		Assert(intMember(left->varno, inner_path->parent->relids));
+		/* lefthand side is inner */
+		innerbucketsize = restrictinfo->left_bucketsize;
+		if (innerbucketsize < 0)
+		{
+			/* not cached yet */
+			innerbucketsize = estimate_hash_bucketsize(root, left);
+			restrictinfo->left_bucketsize = innerbucketsize;
+		}
+	}
+
 	/*
 	 * The number of tuple comparisons needed is the number of outer
 	 * tuples times the typical number of tuples in a hash bucket,
@ -667,14 +734,14 @@ cost_hashjoin(Path *path,
 		ceil(inner_path->parent->rows * innerbucketsize);

 	/*
-	 * Estimate the number of tuples that get through the hashing filter
-	 * as one per tuple in the two source relations.  This could be a
-	 * drastic underestimate if there are many equal-keyed tuples in
-	 * either relation, but we have no simple way of estimating that;
-	 * and since this is only a second-order parameter, it's probably
-	 * not worth expending a lot of effort on the estimate.
+	 * For each tuple that gets through the hashjoin proper, we charge
+	 * cpu_tuple_cost plus the cost of evaluating additional restriction
+	 * clauses that are to be applied at the join.  It's OK to use an
+	 * approximate selectivity here, since in most cases this is a minor
+	 * component of the cost.
 	 */
-	ntuples = outer_path->parent->rows + inner_path->parent->rows;
+	ntuples = approx_selectivity(root, hashclauses) *
+		outer_path->parent->rows * inner_path->parent->rows;

 	/* CPU costs */
 	cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
@ -718,10 +785,6 @@ cost_hashjoin(Path *path,
 * divided by total tuples in relation) if the specified Var is used
 * as a hash key.
 *
- * This statistic is used by cost_hashjoin.  We split out the calculation
- * because it's useful to cache the result for re-use across multiple path
- * cost calculations.
- *
 * XXX This is really pretty bogus since we're effectively assuming that the
 * distribution of hash keys will be the same after applying restriction
 * clauses as it was in the underlying relation.  However, we are not nearly
@ -747,7 +810,7 @@ cost_hashjoin(Path *path,
 * which is what we want.  We do not want to hash unless we know that the
 * inner rel is well-dispersed (or the alternatives seem much worse).
 */
-Selectivity
+static Selectivity
 estimate_hash_bucketsize(Query *root, Var *var)
 {
 	Oid			relid;
@ -1000,6 +1063,65 @@ cost_qual_eval_walker(Node *node, Cost *total)
 }


+/*
+ * approx_selectivity
+ *		Quick-and-dirty estimation of clause selectivities.
+ *		The input can be either an implicitly-ANDed list of boolean
+ *		expressions, or a list of RestrictInfo nodes (typically the latter).
+ *
+ * The "quick" part comes from caching the selectivity estimates so we can
+ * avoid recomputing them later.  (Since the same clauses are typically
+ * examined over and over in different possible join trees, this makes a
+ * big difference.)
+ *
+ * The "dirty" part comes from the fact that the selectivities of multiple
+ * clauses are estimated independently and multiplied together.  Currently,
+ * clauselist_selectivity can seldom do any better than that anyhow, but
+ * someday it might be smarter.
+ *
+ * Since we are only using the results to estimate how many potential
+ * output tuples are generated and passed through qpqual checking, it
+ * seems OK to live with the approximation.
+ */
+static Selectivity
+approx_selectivity(Query *root, List *quals)
+{
+	Selectivity	total = 1.0;
+	List	   *l;
+
+	foreach(l, quals)
+	{
+		Node	   *qual = (Node *) lfirst(l);
+		Selectivity	selec;
+
+		/*
+		 * RestrictInfo nodes contain a this_selec field reserved for this
+		 * routine's use, so that it's not necessary to evaluate the qual
+		 * clause's selectivity more than once.  If the clause's selectivity
+		 * hasn't been computed yet, the field will contain -1.
+		 */
+		if (qual && IsA(qual, RestrictInfo))
+		{
+			RestrictInfo *restrictinfo = (RestrictInfo *) qual;
+
+			if (restrictinfo->this_selec < 0)
+				restrictinfo->this_selec =
+					clause_selectivity(root,
+									   (Node *) restrictinfo->clause,
+									   0);
+			selec = restrictinfo->this_selec;
+		}
+		else
+		{
+			/* If it's a bare expression, must always do it the hard way */
+			selec = clause_selectivity(root, qual, 0);
+		}
+		total *= selec;
+	}
+	return total;
+}
+
+
 /*
 * set_baserel_size_estimates
 *		Set the size estimates for the given base relation.
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.65 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -236,7 +236,8 @@ sort_inner_and_outer(Query *root,
 		 * paths later, and only if they don't need a sort.
 		 */
 		add_path(joinrel, (Path *)
-				 create_mergejoin_path(joinrel,
+				 create_mergejoin_path(root,
+									   joinrel,
 									   jointype,
 									   outerrel->cheapest_total_path,
 									   innerrel->cheapest_total_path,
@ -357,7 +358,8 @@ match_unsorted_outer(Query *root,
 			 * innerjoin indexpath.
 			 */
 			add_path(joinrel, (Path *)
-					 create_nestloop_path(joinrel,
+					 create_nestloop_path(root,
+										  joinrel,
 										  jointype,
 										  outerpath,
 										  innerrel->cheapest_total_path,
@ -366,7 +368,8 @@ match_unsorted_outer(Query *root,
 			if (innerrel->cheapest_startup_path !=
 				innerrel->cheapest_total_path)
 				add_path(joinrel, (Path *)
-						 create_nestloop_path(joinrel,
+						 create_nestloop_path(root,
+											  joinrel,
 											  jointype,
 											  outerpath,
 										 innerrel->cheapest_startup_path,
@ -374,7 +377,8 @@ match_unsorted_outer(Query *root,
 											  merge_pathkeys));
 			if (bestinnerjoin != NULL)
 				add_path(joinrel, (Path *)
-						 create_nestloop_path(joinrel,
+						 create_nestloop_path(root,
+											  joinrel,
 											  jointype,
 											  outerpath,
 											  bestinnerjoin,
@ -405,7 +409,8 @@ match_unsorted_outer(Query *root,
 		 * innerrel->cheapest_total_path is already correctly sorted.)
 		 */
 		add_path(joinrel, (Path *)
-				 create_mergejoin_path(joinrel,
+				 create_mergejoin_path(root,
+									   joinrel,
 									   jointype,
 									   outerpath,
 									   innerrel->cheapest_total_path,
@ -464,7 +469,8 @@ match_unsorted_outer(Query *root,
 				else
 					newclauses = mergeclauses;
 				add_path(joinrel, (Path *)
-						 create_mergejoin_path(joinrel,
+						 create_mergejoin_path(root,
+											   joinrel,
 											   jointype,
 											   outerpath,
 											   innerpath,
@ -507,7 +513,8 @@ match_unsorted_outer(Query *root,
 							newclauses = mergeclauses;
 					}
 					add_path(joinrel, (Path *)
-							 create_mergejoin_path(joinrel,
+							 create_mergejoin_path(root,
+												   joinrel,
 												   jointype,
 												   outerpath,
 												   innerpath,
@ -605,7 +612,8 @@ match_unsorted_inner(Query *root,
 		 */
 		merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
 		add_path(joinrel, (Path *)
-				 create_mergejoin_path(joinrel,
+				 create_mergejoin_path(root,
+									   joinrel,
 									   jointype,
 									   outerrel->cheapest_total_path,
 									   innerpath,
@ -633,7 +641,8 @@ match_unsorted_inner(Query *root,
 		merge_pathkeys = build_join_pathkeys(root, joinrel,
 											 totalouterpath->pathkeys);
 		add_path(joinrel, (Path *)
-				 create_mergejoin_path(joinrel,
+				 create_mergejoin_path(root,
+									   joinrel,
 									   jointype,
 									   totalouterpath,
 									   innerpath,
@ -651,7 +660,8 @@ match_unsorted_inner(Query *root,
 			merge_pathkeys = build_join_pathkeys(root, joinrel,
 											 startupouterpath->pathkeys);
 			add_path(joinrel, (Path *)
-					 create_mergejoin_path(joinrel,
+					 create_mergejoin_path(root,
+										   joinrel,
 										   jointype,
 										   startupouterpath,
 										   innerpath,
@ -718,10 +728,8 @@ hash_inner_and_outer(Query *root,
 	foreach(i, restrictlist)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
-		Expr	   *clause;
 		Var		   *left,
 				   *right;
-		Selectivity innerbucketsize;
 		List	   *hashclauses;

 		if (restrictinfo->hashjoinoperator == InvalidOid)
@ -734,42 +742,22 @@ hash_inner_and_outer(Query *root,
 		if (isouterjoin && restrictinfo->ispusheddown)
 			continue;

-		clause = restrictinfo->clause;
 		/* these must be OK, since check_hashjoinable accepted the clause */
-		left = get_leftop(clause);
-		right = get_rightop(clause);
+		left = get_leftop(restrictinfo->clause);
+		right = get_rightop(restrictinfo->clause);

 		/*
-		 * Check if clause is usable with these sub-rels, find inner side,
-		 * estimate bucketsize of inner var for costing purposes.
-		 *
-		 * Since we tend to visit the same clauses over and over when
-		 * planning a large query, we cache the bucketsize estimates in
-		 * the RestrictInfo node to avoid repeated lookups of statistics.
+		 * Check if clause is usable with these input rels.
 		 */
 		if (intMember(left->varno, outerrelids) &&
 			intMember(right->varno, innerrelids))
 		{
 			/* righthand side is inner */
-			innerbucketsize = restrictinfo->right_bucketsize;
-			if (innerbucketsize < 0)
-			{
-				/* not cached yet */
-				innerbucketsize = estimate_hash_bucketsize(root, right);
-				restrictinfo->right_bucketsize = innerbucketsize;
-			}
 		}
 		else if (intMember(left->varno, innerrelids) &&
 				 intMember(right->varno, outerrelids))
 		{
 			/* lefthand side is inner */
-			innerbucketsize = restrictinfo->left_bucketsize;
-			if (innerbucketsize < 0)
-			{
-				/* not cached yet */
-				innerbucketsize = estimate_hash_bucketsize(root, left);
-				restrictinfo->left_bucketsize = innerbucketsize;
-			}
 		}
 		else
 			continue;			/* no good for these input relations */
@ -783,22 +771,22 @@ hash_inner_and_outer(Query *root,
 		 * any but the cheapest-total-cost inner path, however.
 		 */
 		add_path(joinrel, (Path *)
-				 create_hashjoin_path(joinrel,
+				 create_hashjoin_path(root,
+									  joinrel,
 									  jointype,
 									  outerrel->cheapest_total_path,
 									  innerrel->cheapest_total_path,
 									  restrictlist,
-									  hashclauses,
-									  innerbucketsize));
+									  hashclauses));
 		if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
 			add_path(joinrel, (Path *)
-					 create_hashjoin_path(joinrel,
+					 create_hashjoin_path(root,
+										  joinrel,
 										  jointype,
 										  outerrel->cheapest_startup_path,
 										  innerrel->cheapest_total_path,
 										  restrictlist,
-										  hashclauses,
-										  innerbucketsize));
+										  hashclauses));
 	}
 }

--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.8 2001/01/24 19:42:58 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.9 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -25,7 +25,7 @@
 #include "parser/parse_coerce.h"
 #include "utils/lsyscache.h"

-static void create_tidscan_joinpaths(RelOptInfo *rel);
+static void create_tidscan_joinpaths(Query *root, RelOptInfo *rel);
 static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo);
 static bool isEvaluable(int varno, Node *node);
 static Node *TidequalClause(int varno, Expr *node);
@ -243,7 +243,7 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo)
 * XXX does this actually work?
 */
 static void
-create_tidscan_joinpaths(RelOptInfo *rel)
+create_tidscan_joinpaths(Query *root, RelOptInfo *rel)
 {
 	List	   *rlst = NIL,
 			   *lst;
@ -266,7 +266,7 @@ create_tidscan_joinpaths(RelOptInfo *rel)
 			pathnode->tideval = tideval;
 			pathnode->unjoined_relids = joininfo->unjoined_relids;

-			cost_tidscan(&pathnode->path, rel, tideval);
+			cost_tidscan(&pathnode->path, root, rel, tideval);

 			rlst = lappend(rlst, pathnode);
 		}
@ -286,6 +286,6 @@ create_tidscan_paths(Query *root, RelOptInfo *rel)
 												  rel->baserestrictinfo);

 	if (tideval)
-		add_path(rel, (Path *) create_tidscan_path(rel, tideval));
-	create_tidscan_joinpaths(rel);
+		add_path(rel, (Path *) create_tidscan_path(root, rel, tideval));
+	create_tidscan_joinpaths(root, rel);
 }
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.106 2001/05/20 20:28:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.107 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -47,7 +47,8 @@ static NestLoop *create_nestloop_plan(NestPath *best_path, List *tlist,
 					 List *joinclauses, List *otherclauses,
 					 Plan *outer_plan, List *outer_tlist,
 					 Plan *inner_plan, List *inner_tlist);
-static MergeJoin *create_mergejoin_plan(MergePath *best_path, List *tlist,
+static MergeJoin *create_mergejoin_plan(Query *root,
+					  MergePath *best_path, List *tlist,
 					  List *joinclauses, List *otherclauses,
 					  Plan *outer_plan, List *outer_tlist,
 					  Plan *inner_plan, List *inner_tlist);
@ -244,7 +245,8 @@ create_join_plan(Query *root, JoinPath *best_path)
 	switch (best_path->path.pathtype)
 	{
 		case T_MergeJoin:
-			plan = (Join *) create_mergejoin_plan((MergePath *) best_path,
+			plan = (Join *) create_mergejoin_plan(root,
+												  (MergePath *) best_path,
 												  join_tlist,
 												  joinclauses,
 												  otherclauses,
@ -673,7 +675,8 @@ create_nestloop_plan(NestPath *best_path,
 }

 static MergeJoin *
-create_mergejoin_plan(MergePath *best_path,
+create_mergejoin_plan(Query *root,
+					  MergePath *best_path,
 					  List *tlist,
 					  List *joinclauses,
 					  List *otherclauses,
@ -720,13 +723,15 @@ create_mergejoin_plan(MergePath *best_path,
 	 */
 	if (best_path->outersortkeys)
 		outer_plan = (Plan *)
-			make_sort_from_pathkeys(outer_tlist,
+			make_sort_from_pathkeys(root,
+									outer_tlist,
 									outer_plan,
 									best_path->outersortkeys);

 	if (best_path->innersortkeys)
 		inner_plan = (Plan *)
-			make_sort_from_pathkeys(inner_tlist,
+			make_sort_from_pathkeys(root,
+									inner_tlist,
 									inner_plan,
 									best_path->innersortkeys);

@ -1367,14 +1372,15 @@ make_mergejoin(List *tlist,
 * each key number from 1 to keycount), or the executor will get confused!
 */
 Sort *
-make_sort(List *tlist, Plan *lefttree, int keycount)
+make_sort(Query *root, List *tlist, Plan *lefttree, int keycount)
 {
 	Sort	   *node = makeNode(Sort);
 	Plan	   *plan = &node->plan;
 	Path		sort_path;		/* dummy for result of cost_sort */

 	copy_plan_costsize(plan, lefttree); /* only care about copying size */
-	cost_sort(&sort_path, NIL, lefttree->plan_rows, lefttree->plan_width);
+	cost_sort(&sort_path, root, NIL,
+			  lefttree->plan_rows, lefttree->plan_width);
 	plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
 	plan->total_cost = sort_path.total_cost + lefttree->total_cost;
 	plan->state = (EState *) NULL;
@ -1399,7 +1405,8 @@ make_sort(List *tlist, Plan *lefttree, int keycount)
 * of resdom nodes in the sort plan's target list.
 */
 Sort *
-make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
+make_sort_from_pathkeys(Query *root, List *tlist,
+						Plan *lefttree, List *pathkeys)
 {
 	List	   *sort_tlist;
 	List	   *i;
@ -1455,10 +1462,10 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)

 	Assert(numsortkeys > 0);

-	return make_sort(sort_tlist, lefttree, numsortkeys);
+	return make_sort(root, sort_tlist, lefttree, numsortkeys);
 }

-Material   *
+Material *
 make_material(List *tlist, Plan *lefttree)
 {
 	Material   *node = makeNode(Material);
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.62 2001/05/20 20:28:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.63 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -357,8 +357,9 @@ distribute_qual_to_rels(Query *root, Node *clause,
 	bool		can_be_equijoin;

 	restrictinfo->clause = (Expr *) clause;
-	restrictinfo->eval_cost = -1;		/* not computed until needed */
 	restrictinfo->subclauseindices = NIL;
+	restrictinfo->eval_cost = -1;		/* not computed until needed */
+	restrictinfo->this_selec = -1;		/* not computed until needed */
 	restrictinfo->mergejoinoperator = InvalidOid;
 	restrictinfo->left_sortop = InvalidOid;
 	restrictinfo->right_sortop = InvalidOid;
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@ -14,7 +14,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.65 2001/05/20 20:28:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.66 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -317,7 +317,7 @@ subplanner(Query *root,
 	{
 		Path		sort_path;	/* dummy for result of cost_sort */

-		cost_sort(&sort_path, root->query_pathkeys,
+		cost_sort(&sort_path, root, root->query_pathkeys,
 				  final_rel->rows, final_rel->width);
 		sort_path.startup_cost += cheapestpath->total_cost;
 		sort_path.total_cost += cheapestpath->total_cost;
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.107 2001/05/20 20:28:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.108 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -48,7 +48,8 @@ static Plan *inheritance_planner(Query *parse, List *inheritlist);
 static Plan *grouping_planner(Query *parse, double tuple_fraction);
 static List *make_subplanTargetList(Query *parse, List *tlist,
 					   AttrNumber **groupColIdx);
-static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup,
+static Plan *make_groupplan(Query *parse,
+			   List *group_tlist, bool tuplePerGroup,
 			   List *groupClause, AttrNumber *grpColIdx,
 			   bool is_presorted, Plan *subplan);
 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
@ -1153,7 +1154,8 @@ grouping_planner(Query *parse, double tuple_fraction)
 			current_pathkeys = group_pathkeys;
 		}

-		result_plan = make_groupplan(group_tlist,
+		result_plan = make_groupplan(parse,
+									 group_tlist,
 									 tuplePerGroup,
 									 parse->groupClause,
 									 groupColIdx,
@ -1186,7 +1188,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 	if (parse->sortClause)
 	{
 		if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
-			result_plan = make_sortplan(tlist, result_plan,
+			result_plan = make_sortplan(parse, tlist, result_plan,
 										parse->sortClause);
 	}

@ -1329,7 +1331,8 @@ make_subplanTargetList(Query *parse,
 *		first add an explicit Sort node.
 */
 static Plan *
-make_groupplan(List *group_tlist,
+make_groupplan(Query *parse,
+			   List *group_tlist,
 			   bool tuplePerGroup,
 			   List *groupClause,
 			   AttrNumber *grpColIdx,
@ -1374,7 +1377,7 @@ make_groupplan(List *group_tlist,

 		Assert(keyno > 0);

-		subplan = (Plan *) make_sort(sort_tlist, subplan, keyno);
+		subplan = (Plan *) make_sort(parse, sort_tlist, subplan, keyno);
 	}

 	return (Plan *) make_group(group_tlist, tuplePerGroup, numCols,
@ -1386,7 +1389,7 @@ make_groupplan(List *group_tlist,
 *	  Add a Sort node to implement an explicit ORDER BY clause.
 */
 Plan *
-make_sortplan(List *tlist, Plan *plannode, List *sortcls)
+make_sortplan(Query *parse, List *tlist, Plan *plannode, List *sortcls)
 {
 	List	   *sort_tlist;
 	List	   *i;
@ -1419,7 +1422,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)

 	Assert(keyno > 0);

-	return (Plan *) make_sort(sort_tlist, plannode, keyno);
+	return (Plan *) make_sort(parse, sort_tlist, plannode, keyno);
 }

 /*
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@ -14,7 +14,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.64 2001/05/20 20:28:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.65 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -242,7 +242,7 @@ generate_union_plan(SetOperationStmt *op, Query *parse,

 		tlist = new_unsorted_tlist(plan->targetlist);
 		sortList = addAllTargetsToSortList(NIL, tlist);
-		plan = make_sortplan(tlist, plan, sortList);
+		plan = make_sortplan(parse, tlist, plan, sortList);
 		plan = (Plan *) make_unique(tlist, plan, copyObject(sortList));
 	}
 	return plan;
@ -290,7 +290,7 @@ generate_nonunion_plan(SetOperationStmt *op, Query *parse,
 	 */
 	tlist = new_unsorted_tlist(plan->targetlist);
 	sortList = addAllTargetsToSortList(NIL, tlist);
-	plan = make_sortplan(tlist, plan, sortList);
+	plan = make_sortplan(parse, tlist, plan, sortList);
 	switch (op->op)
 	{
 		case SETOP_INTERSECT:
@ -688,7 +688,8 @@ adjust_inherited_attrs_mutator(Node *node,
 			adjust_inherited_attrs_mutator((Node *) oldinfo->clause, context);

 		newinfo->subclauseindices = NIL;
-		newinfo->eval_cost = -1;		/* reset this too */
+		newinfo->eval_cost = -1;		/* reset these too */
+		newinfo->this_selec = -1;
 		newinfo->left_pathkey = NIL;	/* and these */
 		newinfo->right_pathkey = NIL;
 		newinfo->left_bucketsize = -1;
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.73 2001/05/20 20:28:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.74 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -297,7 +297,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 *	  pathnode.
 */
 Path *
-create_seqscan_path(RelOptInfo *rel)
+create_seqscan_path(Query *root, RelOptInfo *rel)
 {
 	Path	   *pathnode = makeNode(Path);

@ -305,7 +305,7 @@ create_seqscan_path(RelOptInfo *rel)
 	pathnode->parent = rel;
 	pathnode->pathkeys = NIL;	/* seqscan has unordered result */

-	cost_seqscan(pathnode, rel);
+	cost_seqscan(pathnode, root, rel);

 	return pathnode;
 }
@ -371,10 +371,9 @@ create_index_path(Query *root,
 * create_tidscan_path
 *	  Creates a path corresponding to a tid_direct scan, returning the
 *	  pathnode.
- *
 */
-TidPath    *
-create_tidscan_path(RelOptInfo *rel, List *tideval)
+TidPath *
+create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
 {
 	TidPath    *pathnode = makeNode(TidPath);

@ -385,7 +384,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
 												 * necessary? */
 	pathnode->unjoined_relids = NIL;

-	cost_tidscan(&pathnode->path, rel, tideval);
+	cost_tidscan(&pathnode->path, root, rel, tideval);

 	/*
 	 * divide selectivity for each clause to get an equal selectivity as
@ -461,10 +460,10 @@ create_subqueryscan_path(RelOptInfo *rel)
 * 'pathkeys' are the path keys of the new join path
 *
 * Returns the resulting path node.
- *
 */
-NestPath   *
-create_nestloop_path(RelOptInfo *joinrel,
+NestPath *
+create_nestloop_path(Query *root,
+					 RelOptInfo *joinrel,
 					 JoinType jointype,
 					 Path *outer_path,
 					 Path *inner_path,
@ -481,7 +480,8 @@ create_nestloop_path(RelOptInfo *joinrel,
 	pathnode->joinrestrictinfo = restrict_clauses;
 	pathnode->path.pathkeys = pathkeys;

-	cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses);
+	cost_nestloop(&pathnode->path, root, outer_path, inner_path,
+				  restrict_clauses);

 	return pathnode;
 }
@ -501,10 +501,10 @@ create_nestloop_path(RelOptInfo *joinrel,
 *		(this should be a subset of the restrict_clauses list)
 * 'outersortkeys' are the sort varkeys for the outer relation
 * 'innersortkeys' are the sort varkeys for the inner relation
- *
 */
-MergePath  *
-create_mergejoin_path(RelOptInfo *joinrel,
+MergePath *
+create_mergejoin_path(Query *root,
+					  RelOptInfo *joinrel,
 					  JoinType jointype,
 					  Path *outer_path,
 					  Path *inner_path,
@ -539,9 +539,11 @@ create_mergejoin_path(RelOptInfo *joinrel,
 	pathnode->innersortkeys = innersortkeys;

 	cost_mergejoin(&pathnode->jpath.path,
+				   root,
 				   outer_path,
 				   inner_path,
 				   restrict_clauses,
+				   mergeclauses,
 				   outersortkeys,
 				   innersortkeys);

@ -559,17 +561,15 @@ create_mergejoin_path(RelOptInfo *joinrel,
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
 * 'hashclauses' is a list of the hash join clause (always a 1-element list)
 *		(this should be a subset of the restrict_clauses list)
- * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
- *
 */
-HashPath   *
-create_hashjoin_path(RelOptInfo *joinrel,
+HashPath *
+create_hashjoin_path(Query *root,
+					 RelOptInfo *joinrel,
 					 JoinType jointype,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
-					 List *hashclauses,
-					 Selectivity innerbucketsize)
+					 List *hashclauses)
 {
 	HashPath   *pathnode = makeNode(HashPath);

@ -584,10 +584,11 @@ create_hashjoin_path(RelOptInfo *joinrel,
 	pathnode->path_hashclauses = hashclauses;

 	cost_hashjoin(&pathnode->jpath.path,
+				  root,
 				  outer_path,
 				  inner_path,
 				  restrict_clauses,
-				  innerbucketsize);
+				  hashclauses);

 	return pathnode;
 }
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@ -15,7 +15,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.91 2001/05/27 17:37:48 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.92 2001/06/05 05:26:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1132,10 +1132,12 @@ eqjoinsel(PG_FUNCTION_ARGS)
 				totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
 					(nd1 - nmatches);
 			/*
-			 * For robustness, we average the two estimates.  (Can a case
-			 * be made for taking the min or max instead?)
+			 * Use the smaller of the two estimates.  This can be justified
+			 * in essentially the same terms as given below for the no-stats
+			 * case: to a first approximation, we are estimating from the
+			 * point of view of the relation with smaller nd.
 			 */
-			selec = (totalsel1 + totalsel2) * 0.5;
+			selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
 		}
 		else
 		{
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: relation.h,v 1.56 2001/05/20 20:28:20 tgl Exp $
+ * $Id: relation.h,v 1.57 2001/06/05 05:26:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -492,7 +492,7 @@ typedef struct HashPath
 * path is responsible for identifying the restrict clauses it can use
 * and ignoring the rest.  Clauses not implemented by an indexscan,
 * mergejoin, or hashjoin will be placed in the plan qual or joinqual field
- * of the final Plan node, where they will be enforced by general-purpose
+ * of the finished Plan node, where they will be enforced by general-purpose
 * qual-expression-evaluation code.  (But we are still entitled to count
 * their selectivity when estimating the result tuple count, if we
 * can guess what it is...)
@ -504,14 +504,16 @@ typedef struct RestrictInfo

 	Expr	   *clause;			/* the represented clause of WHERE or JOIN */

-	Cost		eval_cost;		/* eval cost of clause; -1 if not yet set */
-
 	bool		ispusheddown;	/* TRUE if clause was pushed down in level */

 	/* only used if clause is an OR clause: */
 	List	   *subclauseindices;		/* indexes matching subclauses */
 	/* subclauseindices is a List of Lists of IndexOptInfos */

+	/* cache space for costs (currently only used for join clauses) */
+	Cost		eval_cost;		/* eval cost of clause; -1 if not yet set */
+	Selectivity	this_selec;		/* selectivity; -1 if not yet set */
+
 	/* valid if clause is mergejoinable, else InvalidOid: */
 	Oid			mergejoinoperator;		/* copy of clause operator */
 	Oid			left_sortop;	/* leftside sortop needed for mergejoin */
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
+ * $Id: cost.h,v 1.40 2001/06/05 05:26:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -52,20 +52,27 @@ extern bool enable_nestloop;
 extern bool enable_mergejoin;
 extern bool enable_hashjoin;

-extern void cost_seqscan(Path *path, RelOptInfo *baserel);
+extern void cost_seqscan(Path *path, Query *root,
+						 RelOptInfo *baserel);
 extern void cost_index(Path *path, Query *root,
-		   RelOptInfo *baserel, IndexOptInfo *index,
-		   List *indexQuals, bool is_injoin);
-extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval);
-extern void cost_sort(Path *path, List *pathkeys, double tuples, int width);
-extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path,
-			  List *restrictlist);
-extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
-			   List *restrictlist,
-			   List *outersortkeys, List *innersortkeys);
-extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-			  List *restrictlist, Selectivity innerbucketsize);
-extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
+					   RelOptInfo *baserel, IndexOptInfo *index,
+					   List *indexQuals, bool is_injoin);
+extern void cost_tidscan(Path *path, Query *root,
+						 RelOptInfo *baserel, List *tideval);
+extern void cost_sort(Path *path, Query *root,
+					  List *pathkeys, double tuples, int width);
+extern void cost_nestloop(Path *path, Query *root,
+						  Path *outer_path, Path *inner_path,
+						  List *restrictlist);
+extern void cost_mergejoin(Path *path, Query *root,
+						   Path *outer_path, Path *inner_path,
+						   List *restrictlist,
+						   List *mergeclauses,
+						   List *outersortkeys, List *innersortkeys);
+extern void cost_hashjoin(Path *path, Query *root,
+						  Path *outer_path, Path *inner_path,
+						  List *restrictlist,
+						  List *hashclauses);
 extern Cost cost_qual_eval(List *quals);
 extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
@ -84,5 +91,8 @@ extern Selectivity restrictlist_selectivity(Query *root,
 extern Selectivity clauselist_selectivity(Query *root,
 					   List *clauses,
 					   int varRelid);
+extern Selectivity clause_selectivity(Query *root,
+									  Node *clause,
+									  int varRelid);

 #endif	 /* COST_H */
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: pathnode.h,v 1.37 2001/05/20 20:28:20 tgl Exp $
+ * $Id: pathnode.h,v 1.38 2001/06/05 05:26:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -26,40 +26,43 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2,
 extern void set_cheapest(RelOptInfo *parent_rel);
 extern void add_path(RelOptInfo *parent_rel, Path *new_path);

-extern Path *create_seqscan_path(RelOptInfo *rel);
+extern Path *create_seqscan_path(Query *root, RelOptInfo *rel);
 extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
 				  IndexOptInfo *index,
 				  List *restriction_clauses,
 				  List *pathkeys,
 				  ScanDirection indexscandir);
-extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval);
+extern TidPath *create_tidscan_path(Query *root, RelOptInfo *rel,
+									List *tideval);
 extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
 extern Path *create_subqueryscan_path(RelOptInfo *rel);

-extern NestPath *create_nestloop_path(RelOptInfo *joinrel,
-					 JoinType jointype,
-					 Path *outer_path,
-					 Path *inner_path,
-					 List *restrict_clauses,
-					 List *pathkeys);
+extern NestPath *create_nestloop_path(Query *root,
+									  RelOptInfo *joinrel,
+									  JoinType jointype,
+									  Path *outer_path,
+									  Path *inner_path,
+									  List *restrict_clauses,
+									  List *pathkeys);

-extern MergePath *create_mergejoin_path(RelOptInfo *joinrel,
-					  JoinType jointype,
-					  Path *outer_path,
-					  Path *inner_path,
-					  List *restrict_clauses,
-					  List *pathkeys,
-					  List *mergeclauses,
-					  List *outersortkeys,
-					  List *innersortkeys);
+extern MergePath *create_mergejoin_path(Query *root,
+										RelOptInfo *joinrel,
+										JoinType jointype,
+										Path *outer_path,
+										Path *inner_path,
+										List *restrict_clauses,
+										List *pathkeys,
+										List *mergeclauses,
+										List *outersortkeys,
+										List *innersortkeys);

-extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
-					 JoinType jointype,
-					 Path *outer_path,
-					 Path *inner_path,
-					 List *restrict_clauses,
-					 List *hashclauses,
-					 Selectivity innerbucketsize);
+extern HashPath *create_hashjoin_path(Query *root,
+									  RelOptInfo *joinrel,
+									  JoinType jointype,
+									  Path *outer_path,
+									  Path *inner_path,
+									  List *restrict_clauses,
+									  List *hashclauses);

 /*
 * prototypes for relnode.c
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: planmain.h,v 1.50 2001/03/22 04:00:55 momjian Exp $
+ * $Id: planmain.h,v 1.51 2001/06/05 05:26:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -29,9 +29,10 @@ extern Plan *create_plan(Query *root, Path *best_path);
 extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
 				  Index scanrelid, Plan *subplan);
 extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
-extern Sort *make_sort(List *tlist, Plan *lefttree, int keycount);
-extern Sort *make_sort_from_pathkeys(List *tlist, Plan *lefttree,
-						List *pathkeys);
+extern Sort *make_sort(Query *root, List *tlist,
+					   Plan *lefttree, int keycount);
+extern Sort *make_sort_from_pathkeys(Query *root, List *tlist,
+									 Plan *lefttree, List *pathkeys);
 extern Agg *make_agg(List *tlist, List *qual, Plan *lefttree);
 extern Group *make_group(List *tlist, bool tuplePerGroup, int ngrp,
 		   AttrNumber *grpColIdx, Plan *lefttree);
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: planner.h,v 1.19 2001/01/24 19:43:26 momjian Exp $
+ * $Id: planner.h,v 1.20 2001/06/05 05:26:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -21,6 +21,7 @@
 extern Plan *planner(Query *parse);
 extern Plan *subquery_planner(Query *parse, double tuple_fraction);

-extern Plan *make_sortplan(List *tlist, Plan *plannode, List *sortcls);
+extern Plan *make_sortplan(Query *parse, List *tlist,
+						   Plan *plannode, List *sortcls);

 #endif	 /* PLANNER_H */