Install some slightly realistic cost estimation for bitmap index scans.

21 years ago · e6f7edb9d5
parent 2f8c7c866c
commit e6f7edb9d5
8 changed files with 195 additions and 31 deletions
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.247 2005/04/19 22:35:14 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.248 2005/04/21 02:28:01 tgl Exp $
 *
 * NOTES
 *	  Every node type that can appear in stored rules' parsetrees *must*
@ -1024,6 +1024,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
 	WRITE_NODE_FIELD(indexquals);
 	WRITE_BOOL_FIELD(isjoininner);
 	WRITE_ENUM_FIELD(indexscandir, ScanDirection);
+	WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
+	WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
 	WRITE_FLOAT_FIELD(rows, "%.0f");
 }

--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -49,7 +49,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.142 2005/04/19 22:35:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.143 2005/04/21 02:28:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -103,6 +103,7 @@ bool		enable_hashjoin = true;


 static bool cost_qual_eval_walker(Node *node, QualCost *total);
+static Selectivity cost_bitmap_qual(Node *bitmapqual, Cost *totalCost);
 static Selectivity approx_selectivity(Query *root, List *quals,
 				   JoinType jointype);
 static Selectivity join_in_selectivity(JoinPath *path, Query *root);
@ -126,7 +127,7 @@ clamp_row_est(double nrows)
 	if (nrows < 1.0)
 		nrows = 1.0;
 	else
-		nrows = ceil(nrows);
+		nrows = rint(nrows);

 	return nrows;
 }
@ -232,6 +233,10 @@ cost_nonsequential_access(double relpages)
 * 'is_injoin' is T if we are considering using the index scan as the inside
 *		of a nestloop join (hence, some of the indexQuals are join clauses)
 *
+ * cost_index() takes an IndexPath not just a Path, because it sets a few
+ * additional fields of the IndexPath besides startup_cost and total_cost.
+ * These fields are needed if the IndexPath is used in a BitmapIndexScan.
+ *
 * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
 * Any additional quals evaluated as qpquals may reduce the number of returned
 * tuples, but they won't reduce the number of tuples we have to fetch from
@ -241,7 +246,7 @@ cost_nonsequential_access(double relpages)
 * it was a list of bare clause expressions.
 */
 void
-cost_index(Path *path, Query *root,
+cost_index(IndexPath *path, Query *root,
 		   IndexOptInfo *index,
 		   List *indexQuals,
 		   bool is_injoin)
@ -286,6 +291,14 @@ cost_index(Path *path, Query *root,
 					 PointerGetDatum(&indexSelectivity),
 					 PointerGetDatum(&indexCorrelation));

+	/*
+	 * Save amcostestimate's results for possible use by cost_bitmap_scan.
+	 * We don't bother to save indexStartupCost or indexCorrelation, because
+	 * a bitmap scan doesn't care about either.
+	 */
+	path->indextotalcost = indexTotalCost;
+	path->indexselectivity = indexSelectivity;
+
 	/* all costs for touching index itself included here */
 	startup_cost += indexStartupCost;
 	run_cost += indexTotalCost - indexStartupCost;
@ -396,8 +409,8 @@ cost_index(Path *path, Query *root,

 	run_cost += cpu_per_tuple * tuples_fetched;

-	path->startup_cost = startup_cost;
-	path->total_cost = startup_cost + run_cost;
+	path->path.startup_cost = startup_cost;
+	path->path.total_cost = startup_cost + run_cost;
 }

 /*
@ -417,19 +430,151 @@ cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
+	Cost		indexTotalCost;
+	Selectivity indexSelectivity;
+	Cost		cpu_per_tuple;
+	Cost		cost_per_page;
+	double		tuples_fetched;
+	double		pages_fetched;
+	double		T;

 	/* Should only be applied to base relations */
 	Assert(IsA(baserel, RelOptInfo));
 	Assert(baserel->relid > 0);
 	Assert(baserel->rtekind == RTE_RELATION);

-	/* XXX lots to do here */
-	run_cost += 10;
+	if (!enable_indexscan)		/* XXX use a separate enable flag? */
+		startup_cost += disable_cost;
+
+	/*
+	 * Estimate total cost of obtaining the bitmap, as well as its total
+	 * selectivity.
+	 */
+	indexTotalCost = 0;
+	indexSelectivity = cost_bitmap_qual(bitmapqual, &indexTotalCost);
+
+	startup_cost += indexTotalCost;
+
+	/*
+	 * The number of heap pages that need to be fetched is the same as the
+	 * Mackert and Lohman formula for the case T <= b (ie, no re-reads
+	 * needed).
+	 */
+	tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+
+	T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+	pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+	if (pages_fetched > T)
+		pages_fetched = T;
+
+	/*
+	 * For small numbers of pages we should charge random_page_cost apiece,
+	 * while if nearly all the table's pages are being read, it's more
+	 * appropriate to charge 1.0 apiece.  The effect is nonlinear, too.
+	 * For lack of a better idea, interpolate like this to determine the
+	 * cost per page.
+	 */
+	cost_per_page = random_page_cost -
+		(random_page_cost - 1.0) * sqrt(pages_fetched / T);
+
+	run_cost += pages_fetched * cost_per_page;
+
+	/*
+	 * Estimate CPU costs per tuple.
+	 *
+	 * Often the indexquals don't need to be rechecked at each tuple ...
+	 * but not always, especially not if there are enough tuples involved
+	 * that the bitmaps become lossy.  For the moment, just assume they
+	 * will be rechecked always.
+	 */
+	startup_cost += baserel->baserestrictcost.startup;
+	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+
+	run_cost += cpu_per_tuple * tuples_fetched;

 	path->startup_cost = startup_cost;
 	path->total_cost = startup_cost + run_cost;
 }

+/*
+ * cost_bitmap_qual
+ *		Recursively examine the AND/OR/IndexPath tree for a bitmap scan
+ *
+ * Total execution costs are added to *totalCost (so caller must be sure
+ * to initialize that to zero).  Estimated total selectivity of the bitmap
+ * is returned as the function result.
+ */
+static Selectivity
+cost_bitmap_qual(Node *bitmapqual, Cost *totalCost)
+{
+	Selectivity	result;
+	Selectivity	subresult;
+	ListCell   *l;
+
+	if (and_clause(bitmapqual))
+	{
+		/*
+		 * We estimate AND selectivity on the assumption that the inputs
+		 * are independent.  This is probably often wrong, but we don't
+		 * have the info to do better.
+		 *
+		 * The runtime cost of the BitmapAnd itself is estimated at 100x
+		 * cpu_operator_cost for each tbm_intersect needed.  Probably too
+		 * small, definitely too simplistic?
+		 *
+		 * This must agree with make_bitmap_and in createplan.c.
+		 */
+		result = 1.0;
+		foreach(l, ((BoolExpr *) bitmapqual)->args)
+		{
+			subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
+			result *= subresult;
+			if (l != list_head(((BoolExpr *) bitmapqual)->args))
+				*totalCost += 100.0 * cpu_operator_cost;
+		}
+	}
+	else if (or_clause(bitmapqual))
+	{
+		/*
+		 * We estimate OR selectivity on the assumption that the inputs
+		 * are non-overlapping, since that's often the case in "x IN (list)"
+		 * type situations.  Of course, we clamp to 1.0 at the end.
+		 *
+		 * The runtime cost of the BitmapOr itself is estimated at 100x
+		 * cpu_operator_cost for each tbm_union needed.  Probably too
+		 * small, definitely too simplistic?  We are aware that the tbm_unions
+		 * are optimized out when the inputs are BitmapIndexScans.
+		 *
+		 * This must agree with make_bitmap_or in createplan.c.
+		 */
+		result = 0.0;
+		foreach(l, ((BoolExpr *) bitmapqual)->args)
+		{
+			subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
+			result += subresult;
+			if (l != list_head(((BoolExpr *) bitmapqual)->args) &&
+				!IsA((Node *) lfirst(l), IndexPath))
+				*totalCost += 100.0 * cpu_operator_cost;
+		}
+		result = Min(result, 1.0);
+	}
+	else if (IsA(bitmapqual, IndexPath))
+	{
+		IndexPath *ipath = (IndexPath *) bitmapqual;
+
+		/* this must agree with create_bitmap_subplan in createplan.c */
+		*totalCost += ipath->indextotalcost;
+		result = ipath->indexselectivity;
+	}
+	else
+	{
+		elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
+		result = 0.0;				/* keep compiler quiet */
+	}
+
+	return result;
+}
+
 /*
 * cost_tidscan
 *	  Determines and returns the cost of scanning a relation using TIDs.
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.174 2005/04/20 21:48:04 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.175 2005/04/21 02:28:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1710,7 +1710,7 @@ make_innerjoin_index_path(Query *root,
 	/* Like costsize.c, force estimate to be at least one row */
 	pathnode->rows = clamp_row_est(pathnode->rows);

-	cost_index(&pathnode->path, root, index, indexquals, true);
+	cost_index(pathnode, root, index, indexquals, true);

 	return (Path *) pathnode;
 }
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.67 2005/03/27 06:29:36 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.68 2005/04/21 02:28:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -353,7 +353,7 @@ best_or_subclause_index(Query *root,
 		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
 		List	   *indexclauses;
 		List	   *indexquals;
-		Path		subclause_path;
+		IndexPath	subclause_path;

 		/*
 		 * Ignore partial indexes that do not match the query.  If predOK
@ -402,13 +402,13 @@ best_or_subclause_index(Query *root,

 		cost_index(&subclause_path, root, index, indexquals, false);

-		if (!found || subclause_path.total_cost < *retTotalCost)
+		if (!found || subclause_path.path.total_cost < *retTotalCost)
 		{
 			*retIndexInfo = index;
 			*retIndexClauses = flatten_clausegroups_list(indexclauses);
 			*retIndexQuals = indexquals;
-			*retStartupCost = subclause_path.startup_cost;
-			*retTotalCost = subclause_path.total_cost;
+			*retStartupCost = subclause_path.path.startup_cost;
+			*retTotalCost = subclause_path.path.total_cost;
 			found = true;
 		}
 	}
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.180 2005/04/19 22:35:16 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.181 2005/04/21 02:28:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -976,10 +976,12 @@ create_bitmap_subplan(Query *root, Node *bitmapqual)
 									  linitial(iscan->indxqualorig),
 									  linitial(iscan->indxstrategy),
 									  linitial(iscan->indxsubtype));
-		/* XXX this cost is wrong: */
-		copy_path_costsize(&bscan->scan.plan, &ipath->path);
-		/* use the indexscan-specific rows estimate, not the parent rel's */
-		bscan->scan.plan.plan_rows = ipath->rows;
+		/* this must agree with cost_bitmap_qual in costsize.c */
+		bscan->scan.plan.startup_cost = 0.0;
+		bscan->scan.plan.total_cost = ipath->indextotalcost;
+		bscan->scan.plan.plan_rows =
+			clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples);
+		bscan->scan.plan.plan_width = 0; /* meaningless */
 		plan = (Plan *) bscan;
 	}
 	else
@ -2068,8 +2070,9 @@ make_bitmap_and(List *bitmapplans)
 	ListCell   *subnode;

 	/*
-	 * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
+	 * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
 	 * (a pretty arbitrary amount, agreed) for each tbm_intersect needed.
+	 * This must agree with cost_bitmap_qual in costsize.c.
 	 */
 	plan->startup_cost = 0;
 	plan->total_cost = 0;
@ -2085,7 +2088,10 @@ make_bitmap_and(List *bitmapplans)
 			plan->plan_rows = subplan->plan_rows;
 		}
 		else
+		{
+			plan->total_cost += cpu_operator_cost * 100.0;
 			plan->plan_rows = Min(plan->plan_rows, subplan->plan_rows);
+		}
 		plan->total_cost += subplan->total_cost;
 	}

@ -2106,10 +2112,12 @@ make_bitmap_or(List *bitmapplans)
 	ListCell   *subnode;

 	/*
-	 * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
+	 * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
 	 * (a pretty arbitrary amount, agreed) for each tbm_union needed.
 	 * We assume that tbm_union can be optimized away for BitmapIndexScan
 	 * subplans.
+	 *
+	 * This must agree with cost_bitmap_qual in costsize.c.
 	 */
 	plan->startup_cost = 0;
 	plan->total_cost = 0;
@ -2122,7 +2130,7 @@ make_bitmap_or(List *bitmapplans)
 		if (subnode == list_head(bitmapplans))	/* first node? */
 			plan->startup_cost = subplan->startup_cost;
 		else if (!IsA(subplan, BitmapIndexScan))
-			plan->total_cost += cpu_operator_cost * 10;
+			plan->total_cost += cpu_operator_cost * 100.0;
 		plan->total_cost += subplan->total_cost;
 		plan->plan_rows += subplan->plan_rows; /* ignore overlap */
 	}
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.116 2005/04/19 22:35:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.117 2005/04/21 02:28:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -466,7 +466,7 @@ create_index_path(Query *root,
 	 */
 	pathnode->rows = index->rel->rows;

-	cost_index(&pathnode->path, root, index, indexquals, false);
+	cost_index(pathnode, root, index, indexquals, false);

 	return pathnode;
 }
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.105 2005/04/19 22:35:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.106 2005/04/21 02:28:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -374,6 +374,10 @@ typedef struct Path
 * NoMovementScanDirection for an indexscan, but the planner wants to
 * distinguish ordered from unordered indexes for building pathkeys.)
 *
+ * 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that
+ * we need not recompute them when considering using the same index in a
+ * bitmap index/heap scan (see BitmapHeapPath).
+ *
 * 'rows' is the estimated result tuple count for the indexscan.  This
 * is the same as path.parent->rows for a simple indexscan, but it is
 * different for a nestloop inner scan, because the additional indexquals
@ -389,6 +393,8 @@ typedef struct IndexPath
 	List	   *indexquals;
 	bool		isjoininner;
 	ScanDirection indexscandir;
+	Cost		indextotalcost;
+	Selectivity indexselectivity;
 	double		rows;			/* estimated number of result tuples */
 } IndexPath;

@ -401,9 +407,12 @@ typedef struct IndexPath
 *
 * The individual indexscans are represented by IndexPath nodes, and any
 * logic on top of them is represented by regular AND and OR expressions.
- * Notice that we can use the same IndexPath node both to represent an
- * ordered index scan, and as the child of a BitmapHeapPath that represents
- * scanning the same index in an unordered way.
+ * Notice that we can use the same IndexPath node both to represent a regular
+ * IndexScan plan, and as the child of a BitmapHeapPath that represents
+ * scanning the same index using a BitmapIndexScan.  The startup_cost and
+ * total_cost figures of an IndexPath always represent the costs to use it
+ * as a regular IndexScan.  The costs of a BitmapIndexScan can be computed
+ * using the IndexPath's indextotalcost and indexselectivity.
 *
 * BitmapHeapPaths can be nestloop inner indexscans.  The isjoininner and
 * rows fields serve the same purpose as for plain IndexPaths.
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.64 2005/04/19 22:35:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.65 2005/04/21 02:28:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -51,7 +51,7 @@ extern bool enable_hashjoin;

 extern double clamp_row_est(double nrows);
 extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel);
-extern void cost_index(Path *path, Query *root, IndexOptInfo *index,
+extern void cost_index(IndexPath *path, Query *root, IndexOptInfo *index,
 		   List *indexQuals, bool is_injoin);
 extern void cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
 							 Node *bitmapqual, bool is_injoin);