Finish implementation of hashed aggregation. Add enable_hashagg GUC

parameter to allow it to be forced off for comparison purposes. Add ORDER BY clauses to a bunch of regression test queries that will otherwise produce randomly-ordered output in the new regime.
23 years ago · 6c1d4662af
parent 2676e11fdf
commit 6c1d4662af
25 changed files with 457 additions and 190 deletions
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.156 2002/11/15 03:22:30 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.157 2002/11/21 00:42:18 tgl Exp $
 -->

 <Chapter Id="runtime">
@ -670,6 +670,17 @@ env PGOPTIONS='-c geqo=off' psql
      </listitem>
     </varlistentry>

+     <varlistentry>
+      <term><varname>ENABLE_HASHAGG</varname> (<type>boolean</type>)</term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's use of hashed aggregation
+	plan types. The default is on. This is used for debugging the query
+	planner.
+       </para>
+      </listitem>
+     </varlistentry>
+
     <varlistentry>
      <term><varname>ENABLE_HASHJOIN</varname> (<type>boolean</type>)</term>
      <listitem>
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -42,7 +42,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.90 2002/09/04 20:31:20 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -79,6 +79,7 @@ bool		enable_seqscan = true;
 bool		enable_indexscan = true;
 bool		enable_tidscan = true;
 bool		enable_sort = true;
+bool		enable_hashagg = true;
 bool		enable_nestloop = true;
 bool		enable_mergejoin = true;
 bool		enable_hashjoin = true;
@ -423,10 +424,8 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)

 /*
 * cost_sort
- *	  Determines and returns the cost of sorting a relation.
- *
- * The cost of supplying the input data is NOT included; the caller should
- * add that cost to both startup and total costs returned from this routine!
+ *	  Determines and returns the cost of sorting a relation, including
+ *	  the cost of reading the input data.
 *
 * If the total volume of data to sort is less than SortMem, we will do
 * an in-memory sort, which requires no I/O and about t*log2(t) tuple
@ -449,6 +448,7 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
 * the right ballpark in most cases.
 *
 * 'pathkeys' is a list of sort keys
+ * 'input_cost' is the total cost for reading the input data
 * 'tuples' is the number of tuples in the relation
 * 'width' is the average tuple width in bytes
 *
@ -456,12 +456,14 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
 * can't conveniently supply the sort keys.  Since this routine doesn't
 * currently do anything with pathkeys anyway, that doesn't matter...
 * but if it ever does, it should react gracefully to lack of key data.
+ * (Actually, the thing we'd most likely be interested in is just the number
+ * of sort keys, which all callers *could* supply.)
 */
 void
 cost_sort(Path *path, Query *root,
-		  List *pathkeys, double tuples, int width)
+		  List *pathkeys, Cost input_cost, double tuples, int width)
 {
-	Cost		startup_cost = 0;
+	Cost		startup_cost = input_cost;
 	Cost		run_cost = 0;
 	double		nbytes = relation_byte_size(tuples, width);
 	long		sortmembytes = SortMem * 1024L;
@ -511,6 +513,92 @@ cost_sort(Path *path, Query *root,
 	path->total_cost = startup_cost + run_cost;
 }

+/*
+ * cost_agg
+ *		Determines and returns the cost of performing an Agg plan node,
+ *		including the cost of its input.
+ *
+ * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
+ * are for appropriately-sorted input.
+ */
+void
+cost_agg(Path *path, Query *root,
+		 AggStrategy aggstrategy, int numAggs,
+		 int numGroupCols, double numGroups,
+		 Cost input_startup_cost, Cost input_total_cost,
+		 double input_tuples)
+{
+	Cost		startup_cost;
+	Cost		total_cost;
+
+	/*
+	 * We charge one cpu_operator_cost per aggregate function per input
+	 * tuple, and another one per output tuple (corresponding to transfn
+	 * and finalfn calls respectively).  If we are grouping, we charge an
+	 * additional cpu_operator_cost per grouping column per input tuple
+	 * for grouping comparisons.
+	 *
+	 * We will produce a single output tuple if not grouping,
+	 * and a tuple per group otherwise.
+	 */
+	if (aggstrategy == AGG_PLAIN)
+	{
+		startup_cost = input_total_cost;
+		startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
+		/* we aren't grouping */
+		total_cost = startup_cost;
+	}
+	else if (aggstrategy == AGG_SORTED)
+	{
+		/* Here we are able to deliver output on-the-fly */
+		startup_cost = input_startup_cost;
+		total_cost = input_total_cost;
+		total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs;
+		total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+	}
+	else
+	{
+		/* must be AGG_HASHED */
+		startup_cost = input_total_cost;
+		startup_cost += cpu_operator_cost * input_tuples * numAggs;
+		startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
+		total_cost = startup_cost;
+		total_cost += cpu_operator_cost * numGroups * numAggs;
+	}
+
+	path->startup_cost = startup_cost;
+	path->total_cost = total_cost;
+}
+
+/*
+ * cost_group
+ *		Determines and returns the cost of performing a Group plan node,
+ *		including the cost of its input.
+ *
+ * Note: caller must ensure that input costs are for appropriately-sorted
+ * input.
+ */
+void
+cost_group(Path *path, Query *root,
+		   int numGroupCols, double numGroups,
+		   Cost input_startup_cost, Cost input_total_cost,
+		   double input_tuples)
+{
+	Cost		startup_cost;
+	Cost		total_cost;
+
+	startup_cost = input_startup_cost;
+	total_cost = input_total_cost;
+
+	/*
+	 * Charge one cpu_operator_cost per comparison per input tuple. We
+	 * assume all columns get compared at most of the tuples.
+	 */
+	total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = total_cost;
+}

 /*
 * cost_nestloop
@ -658,10 +746,10 @@ cost_mergejoin(Path *path, Query *root,
 	 */
 	if (outersortkeys)			/* do we need to sort outer? */
 	{
-		startup_cost += outer_path->total_cost;
 		cost_sort(&sort_path,
 				  root,
 				  outersortkeys,
+				  outer_path->total_cost,
 				  outer_path->parent->rows,
 				  outer_path->parent->width);
 		startup_cost += sort_path.startup_cost;
@ -677,10 +765,10 @@ cost_mergejoin(Path *path, Query *root,

 	if (innersortkeys)			/* do we need to sort inner? */
 	{
-		startup_cost += inner_path->total_cost;
 		cost_sort(&sort_path,
 				  root,
 				  innersortkeys,
+				  inner_path->total_cost,
 				  inner_path->parent->rows,
 				  inner_path->parent->width);
 		startup_cost += sort_path.startup_cost;
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.123 2002/11/19 23:21:58 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1573,9 +1573,11 @@ make_sort(Query *root, List *tlist, Plan *lefttree, int keycount)

 	copy_plan_costsize(plan, lefttree); /* only care about copying size */
 	cost_sort(&sort_path, root, NIL,
-			  lefttree->plan_rows, lefttree->plan_width);
-	plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
-	plan->total_cost = sort_path.total_cost + lefttree->total_cost;
+			  lefttree->total_cost,
+			  lefttree->plan_rows,
+			  lefttree->plan_width);
+	plan->startup_cost = sort_path.startup_cost;
+	plan->total_cost = sort_path.total_cost;
 	plan->state = (EState *) NULL;
 	plan->targetlist = tlist;
 	plan->qual = NIL;
@ -1683,39 +1685,39 @@ make_material(List *tlist, Plan *lefttree)
 }

 Agg *
-make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
-		 int ngrp, AttrNumber *grpColIdx, long numGroups, int numAggs,
+make_agg(Query *root, List *tlist, List *qual,
+		 AggStrategy aggstrategy,
+		 int numGroupCols, AttrNumber *grpColIdx,
+		 long numGroups, int numAggs,
 		 Plan *lefttree)
 {
 	Agg		   *node = makeNode(Agg);
 	Plan	   *plan = &node->plan;
+	Path		agg_path;		/* dummy for result of cost_agg */

 	node->aggstrategy = aggstrategy;
-	node->numCols = ngrp;
+	node->numCols = numGroupCols;
 	node->grpColIdx = grpColIdx;
 	node->numGroups = numGroups;

-	copy_plan_costsize(plan, lefttree);
-
-	/*
-	 * Charge one cpu_operator_cost per aggregate function per input
-	 * tuple.
-	 */
-	plan->total_cost += cpu_operator_cost * plan->plan_rows * numAggs;
+	copy_plan_costsize(plan, lefttree); /* only care about copying size */
+	cost_agg(&agg_path, root,
+			 aggstrategy, numAggs,
+			 numGroupCols, numGroups,
+			 lefttree->startup_cost,
+			 lefttree->total_cost,
+			 lefttree->plan_rows);
+	plan->startup_cost = agg_path.startup_cost;
+	plan->total_cost = agg_path.total_cost;

 	/*
 	 * We will produce a single output tuple if not grouping,
 	 * and a tuple per group otherwise.
 	 */
 	if (aggstrategy == AGG_PLAIN)
-	{
 		plan->plan_rows = 1;
-		plan->startup_cost = plan->total_cost;
-	}
 	else
-	{
 		plan->plan_rows = numGroups;
-	}

 	plan->state = (EState *) NULL;
 	plan->qual = qual;
@ -1727,22 +1729,28 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
 }

 Group *
-make_group(List *tlist,
-		   int ngrp,
+make_group(Query *root,
+		   List *tlist,
+		   int numGroupCols,
 		   AttrNumber *grpColIdx,
 		   double numGroups,
 		   Plan *lefttree)
 {
 	Group	   *node = makeNode(Group);
 	Plan	   *plan = &node->plan;
+	Path		group_path;		/* dummy for result of cost_group */

-	copy_plan_costsize(plan, lefttree);
+	node->numCols = numGroupCols;
+	node->grpColIdx = grpColIdx;

-	/*
-	 * Charge one cpu_operator_cost per comparison per input tuple. We
-	 * assume all columns get compared at most of the tuples.
-	 */
-	plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp;
+	copy_plan_costsize(plan, lefttree); /* only care about copying size */
+	cost_group(&group_path, root,
+			   numGroupCols, numGroups,
+			   lefttree->startup_cost,
+			   lefttree->total_cost,
+			   lefttree->plan_rows);
+	plan->startup_cost = group_path.startup_cost;
+	plan->total_cost = group_path.total_cost;

 	/* One output tuple per estimated result group */
 	plan->plan_rows = numGroups;
@ -1752,8 +1760,6 @@ make_group(List *tlist,
 	plan->targetlist = tlist;
 	plan->lefttree = lefttree;
 	plan->righttree = (Plan *) NULL;
-	node->numCols = ngrp;
-	node->grpColIdx = grpColIdx;

 	return node;
 }
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@ -14,19 +14,17 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.71 2002/11/06 00:00:44 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.72 2002/11/21 00:42:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"

 #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
-#include "optimizer/tlist.h"
-#include "parser/parsetree.h"
-#include "utils/memutils.h"


 /*--------------------
@ -36,11 +34,12 @@
 *
 * Since query_planner does not handle the toplevel processing (grouping,
 * sorting, etc) it cannot select the best path by itself.  It selects
- * two paths: the cheapest path that produces the required tuples, independent
- * of any ordering considerations, and the cheapest path that produces the
- * required tuples in the required ordering, if there is a path that
- * can produce them without an explicit top-level sort step.  The caller
- * (grouping_planner) will make the final decision about which to use.
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path.  The caller (grouping_planner)
+ * will make the final decision about which to use.
 *
 * Input parameters:
 * root is the query to plan
@ -50,7 +49,7 @@
 * Output parameters:
 * *cheapest_path receives the overall-cheapest path for the query
 * *sorted_path receives the cheapest presorted path for the query,
- *				if any (it may be NULL, or the same as cheapest_path)
+ *				if any (NULL if there is no useful presorted path)
 *
 * Note: the Query node also includes a query_pathkeys field, which is both
 * an input and an output of query_planner().  The input value signals
@ -78,6 +77,8 @@ query_planner(Query *root, List *tlist, double tuple_fraction,
 {
 	List	   *constant_quals;
 	RelOptInfo *final_rel;
+	Path	   *cheapestpath;
+	Path	   *sortedpath;

 	/*
 	 * If the query has an empty join tree, then it's something easy like
@ -166,34 +167,76 @@ query_planner(Query *root, List *tlist, double tuple_fraction,

 	/*
 	 * Pick out the cheapest-total path and the cheapest presorted path
-	 * for the requested pathkeys (if there is one).  We can take the
+	 * for the requested pathkeys (if there is one).  We should take the
 	 * tuple fraction into account when selecting the cheapest presorted
 	 * path, but not when selecting the cheapest-total path, since if we
 	 * have to sort then we'll have to fetch all the tuples.  (But there's
 	 * a special case: if query_pathkeys is NIL, meaning order doesn't
 	 * matter, then the "cheapest presorted" path will be the cheapest
 	 * overall for the tuple fraction.)
+	 *
+	 * The cheapest-total path is also the one to use if grouping_planner
+	 * decides to use hashed aggregation, so we return it separately even
+	 * if this routine thinks the presorted path is the winner.
 	 */
-	*cheapest_path = final_rel->cheapest_total_path;
+	cheapestpath = final_rel->cheapest_total_path;

-	*sorted_path =
+	sortedpath =
 		get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
 												  root->query_pathkeys,
 												  tuple_fraction);

+	/* Don't return same path in both guises; just wastes effort */
+	if (sortedpath == cheapestpath)
+		sortedpath = NULL;
+
+	/*
+	 * Forget about the presorted path if it would be cheaper to sort the
+	 * cheapest-total path.  Here we need consider only the behavior at
+	 * the tuple fraction point.
+	 */
+	if (sortedpath)
+	{
+		Path		sort_path;	/* dummy for result of cost_sort */
+
+		if (root->query_pathkeys == NIL ||
+			pathkeys_contained_in(root->query_pathkeys,
+								  cheapestpath->pathkeys))
+		{
+			/* No sort needed for cheapest path */
+			sort_path.startup_cost = cheapestpath->startup_cost;
+			sort_path.total_cost = cheapestpath->total_cost;
+		}
+		else
+		{
+			/* Figure cost for sorting */
+			cost_sort(&sort_path, root, root->query_pathkeys,
+					  cheapestpath->total_cost,
+					  final_rel->rows, final_rel->width);
+		}
+
+		if (compare_fractional_path_costs(sortedpath, &sort_path,
+										  tuple_fraction) > 0)
+		{
+			/* Presorted path is a loser */
+			sortedpath = NULL;
+		}
+	}
+
 	/*
 	 * If we have constant quals, add a toplevel Result step to process them.
 	 */
 	if (constant_quals)
 	{
-		*cheapest_path = (Path *)
-			create_result_path((*cheapest_path)->parent,
-							   *cheapest_path,
-							   constant_quals);
-		if (*sorted_path)
-			*sorted_path = (Path *)
-				create_result_path((*sorted_path)->parent,
-								   *sorted_path,
-								   constant_quals);
+		cheapestpath = (Path *) create_result_path(final_rel,
+												   cheapestpath,
+												   constant_quals);
+		if (sortedpath)
+			sortedpath = (Path *) create_result_path(final_rel,
+													 sortedpath,
+													 constant_quals);
 	}
+
+	*cheapest_path = cheapestpath;
+	*sorted_path = sortedpath;
 }
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -933,11 +933,13 @@ grouping_planner(Query *parse, double tuple_fraction)
 		List	   *sub_tlist;
 		List	   *group_pathkeys;
 		AttrNumber *groupColIdx = NULL;
+		double		sub_tuple_fraction;
 		Path	   *cheapest_path;
 		Path	   *sorted_path;
 		double		dNumGroups = 0;
 		long		numGroups = 0;
 		int			numAggs = 0;
+		int			numGroupCols = length(parse->groupClause);
 		bool		use_hashed_grouping = false;

 		/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
@ -1169,6 +1171,12 @@ grouping_planner(Query *parse, double tuple_fraction)
 			}
 		}

+		/*
+		 * With grouping or aggregation, the tuple fraction to pass to
+		 * query_planner() may be different from what it is at top level.
+		 */
+		sub_tuple_fraction = tuple_fraction;
+
 		if (parse->groupClause)
 		{
 			/*
@ -1182,8 +1190,8 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 * amounts to assuming that all the groups are about the same
 			 * size).
 			 */
-			if (tuple_fraction >= 1.0)
-				tuple_fraction = 0.25;
+			if (sub_tuple_fraction >= 1.0)
+				sub_tuple_fraction = 0.25;

 			/*
 			 * If both GROUP BY and ORDER BY are specified, we will need
@ -1195,7 +1203,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 			if (parse->groupClause && parse->sortClause &&
 				!noncanonical_pathkeys_contained_in(sort_pathkeys,
 													group_pathkeys))
-				tuple_fraction = 0.0;
+				sub_tuple_fraction = 0.0;
 		}
 		else if (parse->hasAggs)
 		{
@ -1203,7 +1211,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 * Ungrouped aggregate will certainly want all the input
 			 * tuples.
 			 */
-			tuple_fraction = 0.0;
+			sub_tuple_fraction = 0.0;
 		}
 		else if (parse->distinctClause)
 		{
@ -1212,15 +1220,15 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 * number of input tuples per output tuple.  Handle the same
 			 * way.
 			 */
-			if (tuple_fraction >= 1.0)
-				tuple_fraction = 0.25;
+			if (sub_tuple_fraction >= 1.0)
+				sub_tuple_fraction = 0.25;
 		}

 		/*
 		 * Generate the best unsorted and presorted paths for this Query
 		 * (but note there may not be any presorted path).
 		 */
-		query_planner(parse, sub_tlist, tuple_fraction,
+		query_planner(parse, sub_tlist, sub_tuple_fraction,
 					  &cheapest_path, &sorted_path);

 		/*
@ -1236,11 +1244,13 @@ grouping_planner(Query *parse, double tuple_fraction)
 		if (parse->groupClause)
 		{
 			/*
-			 * Always estimate the number of groups.
+			 * Always estimate the number of groups.  We can't do this until
+			 * after running query_planner(), either.
 			 */
 			dNumGroups = estimate_num_groups(parse,
 											 parse->groupClause,
 											 cheapest_path->parent->rows);
+			/* Also want it as a long int --- but 'ware overflow! */
 			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);

 			/*
@ -1248,9 +1258,11 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 * aggregates.  (Doing so would imply storing *all* the input
 			 * values in the hash table, which seems like a certain loser.)
 			 */
-			if (parse->hasAggs &&
-				(contain_distinct_agg_clause((Node *) tlist) ||
-				 contain_distinct_agg_clause(parse->havingQual)))
+			if (!enable_hashagg)
+				use_hashed_grouping = false;
+			else if (parse->hasAggs &&
+					 (contain_distinct_agg_clause((Node *) tlist) ||
+					  contain_distinct_agg_clause(parse->havingQual)))
 				use_hashed_grouping = false;
 			else
 			{
@ -1272,11 +1284,96 @@ grouping_planner(Query *parse, double tuple_fraction)

 				if (hashentrysize * dNumGroups <= SortMem * 1024L)
 				{
-					/* much more to do here */
-#if 0
-					/* TEMPORARY HOTWIRE FOR TESTING */
-					use_hashed_grouping = true;
-#endif
+					/*
+					 * Okay, do the cost comparison.  We need to consider
+					 *	cheapest_path + hashagg [+ final sort]
+					 * versus either
+					 *	cheapest_path [+ sort] + group or agg [+ final sort]
+					 * or
+					 *	presorted_path + group or agg [+ final sort]
+					 * where brackets indicate a step that may not be needed.
+					 * We assume query_planner() will have returned a
+					 * presorted path only if it's a winner compared to
+					 * cheapest_path for this purpose.
+					 *
+					 * These path variables are dummies that just hold cost
+					 * fields; we don't make actual Paths for these steps.
+					 */
+					Path		hashed_p;
+					Path		sorted_p;
+
+					cost_agg(&hashed_p, parse,
+							 AGG_HASHED, numAggs,
+							 numGroupCols, dNumGroups,
+							 cheapest_path->startup_cost,
+							 cheapest_path->total_cost,
+							 cheapest_path->parent->rows);
+					/* Result of hashed agg is always unsorted */
+					if (sort_pathkeys)
+						cost_sort(&hashed_p, parse, sort_pathkeys,
+								  hashed_p.total_cost,
+								  dNumGroups,
+								  cheapest_path->parent->width);
+
+					if (sorted_path)
+					{
+						sorted_p.startup_cost = sorted_path->startup_cost;
+						sorted_p.total_cost = sorted_path->total_cost;
+						current_pathkeys = sorted_path->pathkeys;
+					}
+					else
+					{
+						sorted_p.startup_cost = cheapest_path->startup_cost;
+						sorted_p.total_cost = cheapest_path->total_cost;
+						current_pathkeys = cheapest_path->pathkeys;
+					}
+					if (!pathkeys_contained_in(group_pathkeys,
+											   current_pathkeys))
+					{
+						cost_sort(&sorted_p, parse, group_pathkeys,
+								  sorted_p.total_cost,
+								  cheapest_path->parent->rows,
+								  cheapest_path->parent->width);
+						current_pathkeys = group_pathkeys;
+					}
+					if (parse->hasAggs)
+						cost_agg(&sorted_p, parse,
+								 AGG_SORTED, numAggs,
+								 numGroupCols, dNumGroups,
+								 sorted_p.startup_cost,
+								 sorted_p.total_cost,
+								 cheapest_path->parent->rows);
+					else
+						cost_group(&sorted_p, parse,
+								   numGroupCols, dNumGroups,
+								   sorted_p.startup_cost,
+								   sorted_p.total_cost,
+								   cheapest_path->parent->rows);
+					/* The Agg or Group node will preserve ordering */
+					if (sort_pathkeys &&
+						!pathkeys_contained_in(sort_pathkeys,
+											   current_pathkeys))
+					{
+						cost_sort(&sorted_p, parse, sort_pathkeys,
+								  sorted_p.total_cost,
+								  dNumGroups,
+								  cheapest_path->parent->width);
+					}
+
+					/*
+					 * Now make the decision using the top-level tuple
+					 * fraction.  First we have to convert an absolute
+					 * count (LIMIT) into fractional form.
+					 */
+					if (tuple_fraction >= 1.0)
+						tuple_fraction /= dNumGroups;
+
+					if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+													  tuple_fraction) <= 0)
+					{
+						/* Hashed is cheaper, so use it */
+						use_hashed_grouping = true;
+					}
 				}
 			}
 		}
@ -1284,50 +1381,17 @@ grouping_planner(Query *parse, double tuple_fraction)
 		/*
 		 * Select the best path and create a plan to execute it.
 		 *
-		 * If no special sort order is wanted, or if the cheapest path is
-		 * already appropriately ordered, use the cheapest path.
-		 * Otherwise, look to see if we have an already-ordered path that is
-		 * cheaper than doing an explicit sort on the cheapest-total-cost
-		 * path.
+		 * If we are doing hashed grouping, we will always read all the
+		 * input tuples, so use the cheapest-total path.  Otherwise,
+		 * trust query_planner's decision about which to use.
 		 */
-		if (parse->query_pathkeys == NIL ||
-			pathkeys_contained_in(parse->query_pathkeys,
-								  cheapest_path->pathkeys))
+		if (sorted_path && !use_hashed_grouping)
 		{
-			result_plan = create_plan(parse, cheapest_path);
-			current_pathkeys = cheapest_path->pathkeys;
-		}
-		else if (sorted_path)
-		{
-			Path		sort_path;	/* dummy for result of cost_sort */
-
-			cost_sort(&sort_path, parse, parse->query_pathkeys,
-					  sorted_path->parent->rows, sorted_path->parent->width);
-			sort_path.startup_cost += cheapest_path->total_cost;
-			sort_path.total_cost += cheapest_path->total_cost;
-			/* Convert absolute-count tuple_fraction into a fraction */
-			if (tuple_fraction >= 1.0)
-				tuple_fraction /= sorted_path->parent->rows;
-			if (compare_fractional_path_costs(sorted_path, &sort_path,
-											  tuple_fraction) <= 0)
-			{
-				/* Presorted path is cheaper, use it */
-				result_plan = create_plan(parse, sorted_path);
-				current_pathkeys = sorted_path->pathkeys;
-			}
-			else
-			{
-				/* otherwise, doing it the hard way is still cheaper */
-				result_plan = create_plan(parse, cheapest_path);
-				current_pathkeys = cheapest_path->pathkeys;
-			}
+			result_plan = create_plan(parse, sorted_path);
+			current_pathkeys = sorted_path->pathkeys;
 		}
 		else
 		{
-			/*
-			 * No sorted path, so we must use the cheapest-total path.
-			 * The actual sort step will be generated below.
-			 */
 			result_plan = create_plan(parse, cheapest_path);
 			current_pathkeys = cheapest_path->pathkeys;
 		}
@ -1362,10 +1426,11 @@ grouping_planner(Query *parse, double tuple_fraction)
 		if (use_hashed_grouping)
 		{
 			/* Hashed aggregate plan --- no sort needed */
-			result_plan = (Plan *) make_agg(tlist,
+			result_plan = (Plan *) make_agg(parse,
+											tlist,
 											(List *) parse->havingQual,
 											AGG_HASHED,
-											length(parse->groupClause),
+											numGroupCols,
 											groupColIdx,
 											numGroups,
 											numAggs,
@ -1401,10 +1466,11 @@ grouping_planner(Query *parse, double tuple_fraction)
 				current_pathkeys = NIL;
 			}

-			result_plan = (Plan *) make_agg(tlist,
+			result_plan = (Plan *) make_agg(parse,
+											tlist,
 											(List *) parse->havingQual,
 											aggstrategy,
-											length(parse->groupClause),
+											numGroupCols,
 											groupColIdx,
 											numGroups,
 											numAggs,
@ -1436,11 +1502,13 @@ grouping_planner(Query *parse, double tuple_fraction)
 					current_pathkeys = group_pathkeys;
 				}

-				result_plan = (Plan *) make_group(tlist,
-												  length(parse->groupClause),
+				result_plan = (Plan *) make_group(parse,
+												  tlist,
+												  numGroupCols,
 												  groupColIdx,
 												  dNumGroups,
 												  result_plan);
+				/* The Group node won't change sort ordering */
 			}
 		}
 	} /* end of if (setOperations) */
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@ -5,7 +5,7 @@
 * command, configuration file, and command line options.
 * See src/backend/utils/misc/README for more information.
 *
- * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.106 2002/11/15 02:44:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.107 2002/11/21 00:42:19 tgl Exp $
 *
 * Copyright 2000 by PostgreSQL Global Development Group
 * Written by Peter Eisentraut <peter_e@gmx.net>.
@ -323,6 +323,10 @@ static struct config_bool
 		{"enable_sort", PGC_USERSET}, &enable_sort,
 		true, NULL, NULL
 	},
+	{
+		{"enable_hashagg", PGC_USERSET}, &enable_hashagg,
+		true, NULL, NULL
+	},
 	{
 		{"enable_nestloop", PGC_USERSET}, &enable_nestloop,
 		true, NULL, NULL
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@ -83,6 +83,7 @@
 #enable_indexscan = true
 #enable_tidscan = true
 #enable_sort = true
+#enable_hashagg = true
 #enable_nestloop = true
 #enable_mergejoin = true
 #enable_hashjoin = true
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@ -3,7 +3,7 @@
 *
 * Copyright 2000-2002 by PostgreSQL Global Development Group
 *
- * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.68 2002/11/15 03:07:52 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.69 2002/11/21 00:42:19 tgl Exp $
 */

 /*----------------------------------------------------------------------
@ -251,6 +251,7 @@ psql_completion(char *text, int start, int end)
 		"enable_nestloop",
 		"enable_seqscan",
 		"enable_sort",
+		"enable_hashagg",
 		"enable_tidscan",
 		"explain_pretty_print",
 		"extra_float_digits",
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@ -7,15 +7,17 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: cost.h,v 1.47 2002/09/04 20:31:44 momjian Exp $
+ * $Id: cost.h,v 1.48 2002/11/21 00:42:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #ifndef COST_H
 #define COST_H

+#include "nodes/plannodes.h"
 #include "nodes/relation.h"

+
 /* defaults for costsize.c's Cost parameters */
 /* NB: cost-estimation code should use the variables, not these constants! */
 /* If you change these, update backend/utils/misc/postgresql.sample.conf */
@ -42,6 +44,7 @@ extern bool enable_seqscan;
 extern bool enable_indexscan;
 extern bool enable_tidscan;
 extern bool enable_sort;
+extern bool enable_hashagg;
 extern bool enable_nestloop;
 extern bool enable_mergejoin;
 extern bool enable_hashjoin;
@ -56,7 +59,16 @@ extern void cost_tidscan(Path *path, Query *root,
 extern void cost_functionscan(Path *path, Query *root,
 				  RelOptInfo *baserel);
 extern void cost_sort(Path *path, Query *root,
-		  List *pathkeys, double tuples, int width);
+		  List *pathkeys, Cost input_cost, double tuples, int width);
+extern void cost_agg(Path *path, Query *root,
+					 AggStrategy aggstrategy, int numAggs,
+					 int numGroupCols, double numGroups,
+					 Cost input_startup_cost, Cost input_total_cost,
+					 double input_tuples);
+extern void cost_group(Path *path, Query *root,
+					   int numGroupCols, double numGroups,
+					   Cost input_startup_cost, Cost input_total_cost,
+					   double input_tuples);
 extern void cost_nestloop(Path *path, Query *root,
 			  Path *outer_path, Path *inner_path,
 			  List *restrictlist);
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: planmain.h,v 1.62 2002/11/19 23:22:00 tgl Exp $
+ * $Id: planmain.h,v 1.63 2002/11/21 00:42:19 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -34,12 +34,14 @@ extern Sort *make_sort(Query *root, List *tlist,
 		  Plan *lefttree, int keycount);
 extern Sort *make_sort_from_pathkeys(Query *root, List *tlist,
 						Plan *lefttree, List *pathkeys);
-extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
-					 int ngrp, AttrNumber *grpColIdx,
+extern Agg *make_agg(Query *root, List *tlist, List *qual,
+					 AggStrategy aggstrategy,
+					 int numGroupCols, AttrNumber *grpColIdx,
 					 long numGroups, int numAggs,
 					 Plan *lefttree);
-extern Group *make_group(List *tlist,
-						 int ngrp, AttrNumber *grpColIdx, double numGroups,
+extern Group *make_group(Query *root, List *tlist,
+						 int numGroupCols, AttrNumber *grpColIdx,
+						 double numGroups,
 						 Plan *lefttree);
 extern Material *make_material(List *tlist, Plan *lefttree);
 extern Unique *make_unique(List *tlist, Plan *lefttree, List *distinctList);
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@ -87,7 +87,8 @@ SELECT count(DISTINCT four) AS cnt_4 FROM onek;
     4
 (1 row)

-select ten, count(*), sum(four) from onek group by ten;
+select ten, count(*), sum(four) from onek
+group by ten order by ten;
 ten | count | sum 
 -----+-------+-----
   0 |   100 | 100
@ -102,7 +103,8 @@ select ten, count(*), sum(four) from onek group by ten;
   9 |   100 | 200
 (10 rows)

-select ten, count(four), sum(DISTINCT four) from onek group by ten;
+select ten, count(four), sum(DISTINCT four) from onek
+group by ten order by ten;
 ten | count | sum 
 -----+-------+-----
   0 |   100 |   2
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@ -1,6 +1,7 @@
 SELECT * FROM pg_settings WHERE name LIKE 'enable%';
       name       | setting 
 ------------------+---------
+ enable_hashagg   | on
 enable_hashjoin  | on
 enable_indexscan | on
 enable_mergejoin | on
@ -8,7 +9,7 @@ SELECT * FROM pg_settings WHERE name LIKE 'enable%';
 enable_seqscan   | on
 enable_sort      | on
 enable_tidscan   | on
-(7 rows)
+(8 rows)

 CREATE TABLE foo2(fooid int, f2 int);
 INSERT INTO foo2 VALUES(1, 11);
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@ -804,7 +804,7 @@ select * from rtest_vview3;
 7 | item 7
 (3 rows)

-select * from rtest_vview4;
+select * from rtest_vview4 order by a, b;
 a |   b    | refcount 
 ---+--------+----------
 2 | item 2 |        2
@ -868,7 +868,7 @@ select * from rtest_view4;

 delete from rtest_view4;
 insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0;
-select * from rtest_view4;
+select * from rtest_view4 order by a, b;
 a |   b    | c 
 ---+--------+---
 3 | item 3 | 0
--- a/src/test/regress/expected/select_having.out
+++ b/src/test/regress/expected/select_having.out
@ -14,7 +14,7 @@ INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
 INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
 INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
 SELECT b, c FROM test_having
-	GROUP BY b, c HAVING count(*) = 1;
+	GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
 b |    c     
 ---+----------
 1 | XXXX    
@ -23,7 +23,7 @@ SELECT b, c FROM test_having

 -- HAVING is equivalent to WHERE in this case
 SELECT b, c FROM test_having
-	GROUP BY b, c HAVING b = 3; 
+	GROUP BY b, c HAVING b = 3 ORDER BY b, c;
 b |    c     
 ---+----------
 3 | BBBB    
@ -31,7 +31,8 @@ SELECT b, c FROM test_having
 (2 rows)

 SELECT lower(c), count(c) FROM test_having
-	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY lower(c);
  lower   | count 
 ----------+-------
 bbbb     |     3
@ -40,7 +41,8 @@ SELECT lower(c), count(c) FROM test_having
 (3 rows)

 SELECT c, max(a) FROM test_having
-	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY c;
    c     | max 
 ----------+-----
 XXXX     |   0
--- a/src/test/regress/expected/select_having_1.out
+++ b/src/test/regress/expected/select_having_1.out
@ -14,15 +14,25 @@ INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
 INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
 INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
 SELECT b, c FROM test_having
-	GROUP BY b, c HAVING count(*) = 1;
+	GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
 b |    c     
 ---+----------
 1 | XXXX    
 3 | bbbb    
 (2 rows)

+-- HAVING is equivalent to WHERE in this case
+SELECT b, c FROM test_having
+	GROUP BY b, c HAVING b = 3 ORDER BY b, c;
+ b |    c     
+---+----------
+ 3 | BBBB    
+ 3 | bbbb    
+(2 rows)
+
 SELECT lower(c), count(c) FROM test_having
-	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY lower(c);
  lower   | count 
 ----------+-------
 bbbb     |     3
@ -31,7 +41,8 @@ SELECT lower(c), count(c) FROM test_having
 (3 rows)

 SELECT c, max(a) FROM test_having
-	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY c;
    c     | max 
 ----------+-----
 bbbb     |   5
--- a/src/test/regress/expected/select_implicit.out
+++ b/src/test/regress/expected/select_implicit.out
@ -18,7 +18,7 @@ INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
 INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
 INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
 --   w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
    c     | count 
 ----------+-------
 AAAA     |     2
@ -30,7 +30,7 @@ SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
 (6 rows)

 --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
 count 
 -------
     2
@ -100,7 +100,7 @@ SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
 (1 row)

 --   order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
    c     | count 
 ----------+-------
 AAAA     |     2
@ -160,7 +160,7 @@ SELECT a/2, a/2 FROM test_missing_target
 --   group expression w/ target under ambiguous condition
 --   failure NOT expected
 SELECT a/2, a/2 FROM test_missing_target
-	GROUP BY a/2;
+	GROUP BY a/2 ORDER BY a/2;
 ?column? | ?column? 
 ----------+----------
        0 |        0
@ -173,7 +173,7 @@ SELECT a/2, a/2 FROM test_missing_target
 --   group w/ existing GROUP BY target under ambiguous condition
 SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 b | count 
 ---+-------
 1 |     1
@ -185,7 +185,7 @@ SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
 --   group w/o existing GROUP BY target under ambiguous condition
 SELECT count(*) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 count 
 -------
     1
@ -199,7 +199,7 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y
 SELECT count(*) INTO TABLE test_missing_target2 
 FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 SELECT * FROM test_missing_target2;
 count 
 -------
@ -211,7 +211,9 @@ SELECT * FROM test_missing_target2;

 --  Functions and expressions
 --   w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
 ?column? | count 
 ----------+-------
        0 |     5
@ -219,7 +221,9 @@ SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
 (2 rows)

 --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
 count 
 -------
     2
@ -286,7 +290,7 @@ ERROR:  Column reference "b" is ambiguous
 --   group w/ existing GROUP BY target under ambiguous condition
 SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b/2;
+	GROUP BY x.b/2 ORDER BY x.b/2;
 ?column? | count 
 ----------+-------
        0 |     1
@ -305,7 +309,7 @@ ERROR:  Column reference "b" is ambiguous
 SELECT count(x.b) INTO TABLE test_missing_target3 
 FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b/2;
+	GROUP BY x.b/2 ORDER BY x.b/2;
 SELECT * FROM test_missing_target3;
 count 
 -------
--- a/src/test/regress/expected/select_implicit_1.out
+++ b/src/test/regress/expected/select_implicit_1.out
@ -18,7 +18,7 @@ INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
 INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
 INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
 --   w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
    c     | count 
 ----------+-------
 AAAA     |     2
@ -30,7 +30,7 @@ SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
 (6 rows)

 --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
 count 
 -------
     2
@ -100,7 +100,7 @@ SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
 (1 row)

 --   order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
    c     | count 
 ----------+-------
 AAAA     |     2
@ -160,7 +160,7 @@ SELECT a/2, a/2 FROM test_missing_target
 --   group expression w/ target under ambiguous condition
 --   failure NOT expected
 SELECT a/2, a/2 FROM test_missing_target
-	GROUP BY a/2;
+	GROUP BY a/2 ORDER BY a/2;
 ?column? | ?column? 
 ----------+----------
        0 |        0
@ -173,7 +173,7 @@ SELECT a/2, a/2 FROM test_missing_target
 --   group w/ existing GROUP BY target under ambiguous condition
 SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 b | count 
 ---+-------
 1 |     1
@ -185,7 +185,7 @@ SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
 --   group w/o existing GROUP BY target under ambiguous condition
 SELECT count(*) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 count 
 -------
     1
@ -199,7 +199,7 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y
 SELECT count(*) INTO TABLE test_missing_target2 
 FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 SELECT * FROM test_missing_target2;
 count 
 -------
@ -211,7 +211,9 @@ SELECT * FROM test_missing_target2;

 --  Functions and expressions
 --   w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
 ?column? | count 
 ----------+-------
        0 |     5
@ -219,7 +221,9 @@ SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
 (2 rows)

 --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
 count 
 -------
     2
@ -286,7 +290,7 @@ ERROR:  Column reference "b" is ambiguous
 --   group w/ existing GROUP BY target under ambiguous condition
 SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b/2;
+	GROUP BY x.b/2 ORDER BY x.b/2;
 ?column? | count 
 ----------+-------
        0 |     1
@ -305,7 +309,7 @@ ERROR:  Column reference "b" is ambiguous
 SELECT count(x.b) INTO TABLE test_missing_target3 
 FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b/2;
+	GROUP BY x.b/2 ORDER BY x.b/2;
 SELECT * FROM test_missing_target3;
 count 
 -------
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@ -158,7 +158,7 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
 (6 rows)

 select q1, float8(count(*)) / (select count(*) from int8_tbl)
-from int8_tbl group by q1;
+from int8_tbl group by q1 order by q1;
        q1        | ?column? 
 ------------------+----------
              123 |      0.4
--- a/src/test/regress/input/misc.source
+++ b/src/test/regress/input/misc.source
@ -105,7 +105,7 @@ SELECT * FROM f_star* x WHERE x.c ISNULL;

 SELECT sum(a) FROM a_star*;

-SELECT class, sum(a) FROM a_star* GROUP BY class;
+SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class;


 ALTER TABLE f_star RENAME COLUMN f TO ff;
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@ -223,7 +223,7 @@ SELECT sum(a) FROM a_star*;
 355
 (1 row)

-SELECT class, sum(a) FROM a_star* GROUP BY class;
+SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class;
 class | sum 
 -------+-----
 a     |   3
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@ -36,9 +36,11 @@ SELECT count(four) AS cnt_1000 FROM onek;

 SELECT count(DISTINCT four) AS cnt_4 FROM onek;

-select ten, count(*), sum(four) from onek group by ten;
+select ten, count(*), sum(four) from onek
+group by ten order by ten;

-select ten, count(four), sum(DISTINCT four) from onek group by ten;
+select ten, count(four), sum(DISTINCT four) from onek
+group by ten order by ten;


 SELECT newavg(four) AS avg_1 FROM onek;
--- a/src/test/regress/sql/rules.sql
+++ b/src/test/regress/sql/rules.sql
@ -451,7 +451,7 @@ insert into rtest_view2 values (7);
 select * from rtest_vview1;
 select * from rtest_vview2;
 select * from rtest_vview3;
-select * from rtest_vview4;
+select * from rtest_vview4 order by a, b;
 select * from rtest_vview5;

 insert into rtest_view3 select * from rtest_vview1 where a < 7;
@ -471,7 +471,7 @@ select * from rtest_view4;
 delete from rtest_view4;

 insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0;
-select * from rtest_view4;
+select * from rtest_view4 order by a, b;
 delete from rtest_view4;
 --
 -- Test for computations in views
--- a/src/test/regress/sql/select_having.sql
+++ b/src/test/regress/sql/select_having.sql
@ -16,17 +16,18 @@ INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
 INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');

 SELECT b, c FROM test_having
-	GROUP BY b, c HAVING count(*) = 1;
+	GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;

 -- HAVING is equivalent to WHERE in this case
 SELECT b, c FROM test_having
-	GROUP BY b, c HAVING b = 3; 
+	GROUP BY b, c HAVING b = 3 ORDER BY b, c;

 SELECT lower(c), count(c) FROM test_having
-	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY lower(c);

 SELECT c, max(a) FROM test_having
-	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+	ORDER BY c;

 DROP TABLE test_having;
-
--- a/src/test/regress/sql/select_implicit.sql
+++ b/src/test/regress/sql/select_implicit.sql
@ -21,10 +21,10 @@ INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');


 --   w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;

 --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;

 --   w/o existing GROUP BY target and w/o existing a different ORDER BY target
 --   failure expected
@ -47,7 +47,7 @@ SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc;
 SELECT count(*) FROM test_missing_target ORDER BY 1 desc;

 --   order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;

 --   group using reference number out of range
 --   failure expected
@ -72,34 +72,38 @@ SELECT a/2, a/2 FROM test_missing_target
 --   group expression w/ target under ambiguous condition
 --   failure NOT expected
 SELECT a/2, a/2 FROM test_missing_target
-	GROUP BY a/2;
+	GROUP BY a/2 ORDER BY a/2;

 --   group w/ existing GROUP BY target under ambiguous condition
 SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;

 --   group w/o existing GROUP BY target under ambiguous condition
 SELECT count(*) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;

 --   group w/o existing GROUP BY target under ambiguous condition
 --   into a table
 SELECT count(*) INTO TABLE test_missing_target2 
 FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b;
+	GROUP BY x.b ORDER BY x.b;
 SELECT * FROM test_missing_target2;


 --  Functions and expressions

 --   w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;

 --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);

 --   w/o existing GROUP BY target and w/o existing a different ORDER BY target
 --   failure expected
@ -128,7 +132,7 @@ SELECT count(x.a) FROM test_missing_target x, test_missing_target y
 --   group w/ existing GROUP BY target under ambiguous condition
 SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b/2;
+	GROUP BY x.b/2 ORDER BY x.b/2;

 --   group w/o existing GROUP BY target under ambiguous condition
 --   failure expected due to ambiguous b in count(b)
@ -141,7 +145,7 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y
 SELECT count(x.b) INTO TABLE test_missing_target3 
 FROM test_missing_target x, test_missing_target y 
 	WHERE x.a = y.a
-	GROUP BY x.b/2;
+	GROUP BY x.b/2 ORDER BY x.b/2;
 SELECT * FROM test_missing_target3;

 --   Cleanup
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@ -74,4 +74,4 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
  WHERE f1 NOT IN (SELECT f1+1 FROM INT4_TBL WHERE f1 != ss.f1);

 select q1, float8(count(*)) / (select count(*) from int8_tbl)
-from int8_tbl group by q1;
+from int8_tbl group by q1 order by q1;