Create a new file executor/execGrouping.c to centralize utility routines

shared by nodeGroup, nodeAgg, and soon nodeSubplan.
23 years ago · 1afac12910
parent c83702606c
commit 1afac12910
12 changed files with 498 additions and 337 deletions
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@ -4,7 +4,7 @@
 #    Makefile for executor
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.19 2002/05/12 23:43:02 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.20 2003/01/10 23:54:24 tgl Exp $
 #
 #-------------------------------------------------------------------------
@ -12,7 +12,7 @@ subdir = src/backend/executor
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
-OBJS = execAmi.o execJunk.o execMain.o \
+OBJS = execAmi.o execGrouping.o execJunk.o execMain.o \
       execProcnode.o execQual.o execScan.o execTuples.o \
       execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o nodeHash.o \
       nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@ -0,0 +1,369 @@
 /*-------------------------------------------------------------------------
 *
 * execGrouping.c
 *	  executor utility routines for grouping, hashing, and aggregation
 *
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include "access/hash.h"
 #include "access/heapam.h"
 #include "executor/executor.h"
 #include "parser/parse_oper.h"
 #include "utils/memutils.h"
 /*****************************************************************************
 *		Utility routines for grouping tuples together
 *
 * These routines actually implement SQL's notion of "distinct/not distinct".
 * Two tuples match if they are not distinct in all the compared columns,
 * i.e., the column values are either both null, or both non-null and equal.
 *****************************************************************************/
 /*
 * execTuplesMatch
 *		Return true if two tuples match in all the indicated fields.
 *		This is used to detect group boundaries in nodeGroup and nodeAgg,
 *		and to decide whether two tuples are distinct or not in nodeUnique.
 *
 * tuple1, tuple2: the tuples to compare
 * tupdesc: tuple descriptor applying to both tuples
 * numCols: the number of attributes to be examined
 * matchColIdx: array of attribute column numbers
 * eqFunctions: array of fmgr lookup info for the equality functions to use
 * evalContext: short-term memory context for executing the functions
 *
 * NB: evalContext is reset each time!
 */
 bool
 execTuplesMatch(HeapTuple tuple1,
 				HeapTuple tuple2,
 				TupleDesc tupdesc,
 				int numCols,
 				AttrNumber *matchColIdx,
 				FmgrInfo *eqfunctions,
 				MemoryContext evalContext)
 {
 	MemoryContext oldContext;
 	bool		result;
 	int			i;
 	/* Reset and switch into the temp context. */
 	MemoryContextReset(evalContext);
 	oldContext = MemoryContextSwitchTo(evalContext);
 	/*
 	 * We cannot report a match without checking all the fields, but we
 	 * can report a non-match as soon as we find unequal fields.  So,
 	 * start comparing at the last field (least significant sort key).
 	 * That's the most likely to be different if we are dealing with
 	 * sorted input.
 	 */
 	result = true;
 	for (i = numCols; --i >= 0;)
 	{
 		AttrNumber	att = matchColIdx[i];
 		Datum		attr1,
 					attr2;
 		bool		isNull1,
 					isNull2;
 		attr1 = heap_getattr(tuple1,
 							 att,
 							 tupdesc,
 							 &isNull1);
 		attr2 = heap_getattr(tuple2,
 							 att,
 							 tupdesc,
 							 &isNull2);
 		if (isNull1 != isNull2)
 		{
 			result = false;		/* one null and one not; they aren't equal */
 			break;
 		}
 		if (isNull1)
 			continue;			/* both are null, treat as equal */
 		/* Apply the type-specific equality function */
 		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
 										attr1, attr2)))
 		{
 			result = false;		/* they aren't equal */
 			break;
 		}
 	}
 	MemoryContextSwitchTo(oldContext);
 	return result;
 }
 /*
 * execTuplesMatchPrepare
 *		Look up the equality functions needed for execTuplesMatch.
 *		The result is a palloc'd array.
 */
 FmgrInfo *
 execTuplesMatchPrepare(TupleDesc tupdesc,
 					   int numCols,
 					   AttrNumber *matchColIdx)
 {
 	FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 	int			i;
 	for (i = 0; i < numCols; i++)
 	{
 		AttrNumber	att = matchColIdx[i];
 		Oid			typid = tupdesc->attrs[att - 1]->atttypid;
 		Oid			eq_function;
 		eq_function = equality_oper_funcid(typid);
 		fmgr_info(eq_function, &eqfunctions[i]);
 	}
 	return eqfunctions;
 }
 /*****************************************************************************
 *		Utility routines for hashing
 *****************************************************************************/
 /*
 * ComputeHashFunc
 *
 *		the hash function for hash joins (also used for hash aggregation)
 *
 *		XXX this probably ought to be replaced with datatype-specific
 *		hash functions, such as those already implemented for hash indexes.
 */
 uint32
 ComputeHashFunc(Datum key, int typLen, bool byVal)
 {
 	unsigned char *k;
 	if (byVal)
 	{
 		/*
 		 * If it's a by-value data type, just hash the whole Datum value.
 		 * This assumes that datatypes narrower than Datum are
 		 * consistently padded (either zero-extended or sign-extended, but
 		 * not random bits) to fill Datum; see the XXXGetDatum macros in
 		 * postgres.h. NOTE: it would not work to do hash_any(&key, len)
 		 * since this would get the wrong bytes on a big-endian machine.
 		 */
 		k = (unsigned char *) &key;
 		typLen = sizeof(Datum);
 	}
 	else
 	{
 		if (typLen > 0)
 		{
 			/* fixed-width pass-by-reference type */
 			k = (unsigned char *) DatumGetPointer(key);
 		}
 		else if (typLen == -1)
 		{
 			/*
 			 * It's a varlena type, so 'key' points to a "struct varlena".
 			 * NOTE: VARSIZE returns the "real" data length plus the
 			 * sizeof the "vl_len" attribute of varlena (the length
 			 * information). 'key' points to the beginning of the varlena
 			 * struct, so we have to use "VARDATA" to find the beginning
 			 * of the "real" data.	Also, we have to be careful to detoast
 			 * the datum if it's toasted.  (We don't worry about freeing
 			 * the detoasted copy; that happens for free when the
 			 * per-tuple memory context is reset in ExecHashGetBucket.)
 			 */
 			struct varlena *vkey = PG_DETOAST_DATUM(key);
 			typLen = VARSIZE(vkey) - VARHDRSZ;
 			k = (unsigned char *) VARDATA(vkey);
 		}
 		else if (typLen == -2)
 		{
 			/* It's a null-terminated C string */
 			typLen = strlen(DatumGetCString(key)) + 1;
 			k = (unsigned char *) DatumGetPointer(key);
 		}
 		else
 		{
 			elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
 			k = NULL;			/* keep compiler quiet */
 		}
 	}
 	return DatumGetUInt32(hash_any(k, typLen));
 }
 /*****************************************************************************
 *		Utility routines for all-in-memory hash tables
 *
 * These routines build hash tables for grouping tuples together (eg, for
 * hash aggregation).  There is one entry for each not-distinct set of tuples
 * presented.
 *****************************************************************************/
 /*
 * Construct an empty TupleHashTable
 *
 *	numCols, keyColIdx: identify the tuple fields to use as lookup key
 *	eqfunctions: equality comparison functions to use
 *	nbuckets: number of buckets to make
 *	entrysize: size of each entry (at least sizeof(TupleHashEntryData))
 *	tablecxt: memory context in which to store table and table entries
 *	tempcxt: short-lived context for evaluation hash and comparison functions
 *
 * The eqfunctions array may be made with execTuplesMatchPrepare().
 *
 * Note that keyColIdx and eqfunctions must be allocated in storage that
 * will live as long as the hashtable does.
 */
 TupleHashTable
 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *eqfunctions,
 					int nbuckets, Size entrysize,
 					MemoryContext tablecxt, MemoryContext tempcxt)
 {
 	TupleHashTable	hashtable;
 	Size			tabsize;
 	Assert(nbuckets > 0);
 	Assert(entrysize >= sizeof(TupleHashEntryData));
 	tabsize = sizeof(TupleHashTableData) +
 		(nbuckets - 1) * sizeof(TupleHashEntry);
 	hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
 	hashtable->eqfunctions = eqfunctions;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
 	hashtable->nbuckets = nbuckets;
 	return hashtable;
 }
 /*
 * Find or create a hashtable entry for the tuple group containing the
 * given tuple.
 *
 * On return, *isnew is true if the entry is newly created, false if it
 * existed already.  Any extra space in a new entry has been zeroed.
 */
 TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 					 bool *isnew)
 {
 	int			numCols = hashtable->numCols;
 	AttrNumber *keyColIdx = hashtable->keyColIdx;
 	HeapTuple	tuple = slot->val;
 	TupleDesc	tupdesc = slot->ttc_tupleDescriptor;
 	uint32		hashkey = 0;
 	int			i;
 	int			bucketno;
 	TupleHashEntry entry;
 	MemoryContext oldContext;
 	/* Need to run the hash function in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 	for (i = 0; i < numCols; i++)
 	{
 		AttrNumber	att = keyColIdx[i];
 		Datum		attr;
 		bool		isNull;
 		/* rotate hashkey left 1 bit at each step */
 		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
 		attr = heap_getattr(tuple, att, tupdesc, &isNull);
 		if (isNull)
 			continue;			/* treat nulls as having hash key 0 */
 		hashkey ^= ComputeHashFunc(attr,
 								   (int) tupdesc->attrs[att - 1]->attlen,
 								   tupdesc->attrs[att - 1]->attbyval);
 	}
 	bucketno = hashkey % (uint32) hashtable->nbuckets;
 	for (entry = hashtable->buckets[bucketno];
 		 entry != NULL;
 		 entry = entry->next)
 	{
 		/* Quick check using hashkey */
 		if (entry->hashkey != hashkey)
 			continue;
 		if (execTuplesMatch(entry->firstTuple,
 							tuple,
 							tupdesc,
 							numCols, keyColIdx,
 							hashtable->eqfunctions,
 							hashtable->tempcxt))
 		{
 			MemoryContextSwitchTo(oldContext);
 			*isnew = false;
 			return entry;
 		}
 	}
 	/* Not there, so build a new one */
 	MemoryContextSwitchTo(hashtable->tablecxt);
 	entry = (TupleHashEntry) palloc0(hashtable->entrysize);
 	entry->hashkey = hashkey;
 	entry->firstTuple = heap_copytuple(tuple);
 	entry->next = hashtable->buckets[bucketno];
 	hashtable->buckets[bucketno] = entry;
 	MemoryContextSwitchTo(oldContext);
 	*isnew = true;
 	return entry;
 }
 /*
 * Walk through all the entries of a hash table, in no special order.
 * Returns NULL when no more entries remain.
 *
 * Iterator state must be initialized with ResetTupleHashIterator() macro.
 */
 TupleHashEntry
 ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
 {
 	TupleHashEntry	entry;
 	entry = state->next_entry;
 	while (entry == NULL)
 	{
 		if (state->next_bucket >= hashtable->nbuckets)
 		{
 			/* No more entries in hashtable, so done */
 			return NULL;
 		}
 		entry = hashtable->buckets[state->next_bucket++];
 	}
 	state->next_entry = entry->next;
 	return entry;
 }
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@ -45,7 +45,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.101 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.102 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -57,8 +57,6 @@
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
 #include "executor/nodeGroup.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "parser/parse_coerce.h"
@ -182,21 +180,15 @@ typedef struct AggStatePerGroupData
 * distinct set of GROUP BY column values.  We compute the hash key from
 * the GROUP BY columns.
 */
 typedef struct AggHashEntryData *AggHashEntry;
 typedef struct AggHashEntryData
 {
-	AggHashEntry	next;		/* next entry in same hash bucket */
+	TupleHashEntryData shared;	/* common header for hash table entries */
 	uint32		hashkey;		/* exact hash key of this entry */
 	HeapTuple	firstTuple;		/* copy of first tuple in this group */
 	/* per-aggregate transition status array - must be last! */
 	AggStatePerGroupData pergroup[1];	/* VARIABLE LENGTH ARRAY */
 } AggHashEntryData;				/* VARIABLE LENGTH STRUCT */
 typedef struct AggHashTableData
 {
 	int			nbuckets;		/* number of buckets in hash table */
 	AggHashEntry buckets[1];	/* VARIABLE LENGTH ARRAY */
 } AggHashTableData;				/* VARIABLE LENGTH STRUCT */
 static void initialize_aggregates(AggState *aggstate,
 								  AggStatePerAgg peragg,
@ -578,18 +570,22 @@ static void
 build_hash_table(AggState *aggstate)
 {
 	Agg			   *node = (Agg *) aggstate->ss.ps.plan;
-	AggHashTable	hashtable;
+	MemoryContext	tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
-	Size			tabsize;
+	Size			entrysize;
 	Assert(node->aggstrategy == AGG_HASHED);
 	Assert(node->numGroups > 0);
-	tabsize = sizeof(AggHashTableData) +
+
-		(node->numGroups - 1) * sizeof(AggHashEntry);
+	entrysize = sizeof(AggHashEntryData) +
-	hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext,
+		(aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
-												  tabsize);
+
-	MemSet(hashtable, 0, tabsize);
+	aggstate->hashtable = BuildTupleHashTable(node->numCols,
-	hashtable->nbuckets = node->numGroups;
+											  node->grpColIdx,
-	aggstate->hashtable = hashtable;
+											  aggstate->eqfunctions,
 											  node->numGroups,
 											  entrysize,
 											  aggstate->aggcontext,
 											  tmpmem);
 }
 /*
@ -601,75 +597,19 @@ build_hash_table(AggState *aggstate)
 static AggHashEntry
 lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
 {
-	Agg		   *node = (Agg *) aggstate->ss.ps.plan;
+	AggHashEntry entry;
-	AggHashTable hashtable = aggstate->hashtable;
+	bool		isnew;
 	MemoryContext	tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
 	HeapTuple	tuple = slot->val;
 	TupleDesc	tupdesc = slot->ttc_tupleDescriptor;
 	uint32		hashkey = 0;
 	int			i;
 	int			bucketno;
 	AggHashEntry	entry;
 	MemoryContext oldContext;
 	Size		entrysize;
 	/* Need to run the hash function in short-lived context */
 	oldContext = MemoryContextSwitchTo(tmpmem);
 	for (i = 0; i < node->numCols; i++)
 	{
 		AttrNumber	att = node->grpColIdx[i];
 		Datum		attr;
 		bool		isNull;
-		/* rotate hashkey left 1 bit at each step */
+	entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
-		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+												slot,
 												&isnew);
-		attr = heap_getattr(tuple, att, tupdesc, &isNull);
+	if (isnew)
 		if (isNull)
 			continue;			/* treat nulls as having hash key 0 */
 		hashkey ^= ComputeHashFunc(attr,
 								   (int) tupdesc->attrs[att - 1]->attlen,
 								   tupdesc->attrs[att - 1]->attbyval);
 	}
 	bucketno = hashkey % (uint32) hashtable->nbuckets;
 	for (entry = hashtable->buckets[bucketno];
 		 entry != NULL;
 		 entry = entry->next)
 	{
-		/* Quick check using hashkey */
+		/* initialize aggregates for new tuple group */
-		if (entry->hashkey != hashkey)
+		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
 			continue;
 		if (execTuplesMatch(entry->firstTuple,
 							tuple,
 							tupdesc,
 							node->numCols, node->grpColIdx,
 							aggstate->eqfunctions,
 							tmpmem))
 		{
 			MemoryContextSwitchTo(oldContext);
 			return entry;
 		}
 	}
 	/* Not there, so build a new one */
 	MemoryContextSwitchTo(aggstate->aggcontext);
 	entrysize = sizeof(AggHashEntryData) +
 		(aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
 	entry = (AggHashEntry) palloc0(entrysize);
 	entry->hashkey = hashkey;
 	entry->firstTuple = heap_copytuple(tuple);
 	entry->next = hashtable->buckets[bucketno];
 	hashtable->buckets[bucketno] = entry;
 	MemoryContextSwitchTo(oldContext);
 	/* initialize aggregates for new tuple group */
 	initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
 	return entry;
 }
@ -964,8 +904,7 @@ agg_fill_hash_table(AggState *aggstate)
 	aggstate->table_filled = true;
 	/* Initialize to walk the hash table */
-	aggstate->next_hash_entry = NULL;
+	ResetTupleHashIterator(&aggstate->hashiter);
 	aggstate->next_hash_bucket = 0;
 }
 /*
@ -980,7 +919,7 @@ agg_retrieve_hash_table(AggState *aggstate)
 	bool	   *aggnulls;
 	AggStatePerAgg peragg;
 	AggStatePerGroup pergroup;
-	AggHashTable	hashtable;
+	TupleHashTable	hashtable;
 	AggHashEntry	entry;
 	TupleTableSlot *firstSlot;
 	TupleTableSlot *resultSlot;
@ -1010,18 +949,14 @@ agg_retrieve_hash_table(AggState *aggstate)
 		/*
 		 * Find the next entry in the hash table
 		 */
-		entry = aggstate->next_hash_entry;
+		entry = (AggHashEntry) ScanTupleHashTable(hashtable,
-		while (entry == NULL)
+												  &aggstate->hashiter);
 		if (entry == NULL)
 		{
-			if (aggstate->next_hash_bucket >= hashtable->nbuckets)
+			/* No more entries in hashtable, so done */
-			{
+			aggstate->agg_done = TRUE;
-				/* No more entries in hashtable, so done */
+			return NULL;
 				aggstate->agg_done = TRUE;
 				return NULL;
 			}
 			entry = hashtable->buckets[aggstate->next_hash_bucket++];
 		}
 		aggstate->next_hash_entry = entry->next;
 		/*
 		 * Clear the per-output-tuple context for each group
@ -1032,7 +967,7 @@ agg_retrieve_hash_table(AggState *aggstate)
 		 * Store the copied first input tuple in the tuple table slot
 		 * reserved for it, so that it can be used in ExecProject.
 		 */
-		ExecStoreTuple(entry->firstTuple,
+		ExecStoreTuple(entry->shared.firstTuple,
 					   firstSlot,
 					   InvalidBuffer,
 					   false);
@ -1187,6 +1122,17 @@ ExecInitAgg(Agg *node, EState *estate)
 		numaggs = 1;
 	}
 	/*
 	 * If we are grouping, precompute fmgr lookup data for inner loop
 	 */
 	if (node->numCols > 0)
 	{
 		aggstate->eqfunctions =
 			execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
 								   node->numCols,
 								   node->grpColIdx);
 	}
 	/*
 	 * Set up aggregate-result storage in the output expr context, and also
 	 * allocate my private per-agg working storage
@ -1211,17 +1157,6 @@ ExecInitAgg(Agg *node, EState *estate)
 		aggstate->pergroup = pergroup;
 	}
 	/*
 	 * If we are grouping, precompute fmgr lookup data for inner loop
 	 */
 	if (node->numCols > 0)
 	{
 		aggstate->eqfunctions =
 			execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
 								   node->numCols,
 								   node->grpColIdx);
 	}
 	/*
 	 * Perform lookups of aggregate function info, and initialize the
 	 * unchanging fields of the per-agg data
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@ -15,7 +15,7 @@
 *	  locate group boundaries.
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.53 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.54 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -23,13 +23,8 @@
 #include "postgres.h"
 #include "access/heapam.h"
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
 #include "parser/parse_oper.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 /*
@ -241,116 +236,3 @@ ExecReScanGroup(GroupState *node, ExprContext *exprCtxt)
 		((PlanState *) node)->lefttree->chgParam == NULL)
 		ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
 }
 /*****************************************************************************
 *		Code shared with nodeUnique.c and nodeAgg.c
 *****************************************************************************/
 /*
 * execTuplesMatch
 *		Return true if two tuples match in all the indicated fields.
 *		This is used to detect group boundaries in nodeGroup and nodeAgg,
 *		and to decide whether two tuples are distinct or not in nodeUnique.
 *
 * tuple1, tuple2: the tuples to compare
 * tupdesc: tuple descriptor applying to both tuples
 * numCols: the number of attributes to be examined
 * matchColIdx: array of attribute column numbers
 * eqFunctions: array of fmgr lookup info for the equality functions to use
 * evalContext: short-term memory context for executing the functions
 *
 * NB: evalContext is reset each time!
 */
 bool
 execTuplesMatch(HeapTuple tuple1,
 				HeapTuple tuple2,
 				TupleDesc tupdesc,
 				int numCols,
 				AttrNumber *matchColIdx,
 				FmgrInfo *eqfunctions,
 				MemoryContext evalContext)
 {
 	MemoryContext oldContext;
 	bool		result;
 	int			i;
 	/* Reset and switch into the temp context. */
 	MemoryContextReset(evalContext);
 	oldContext = MemoryContextSwitchTo(evalContext);
 	/*
 	 * We cannot report a match without checking all the fields, but we
 	 * can report a non-match as soon as we find unequal fields.  So,
 	 * start comparing at the last field (least significant sort key).
 	 * That's the most likely to be different if we are dealing with
 	 * sorted input.
 	 */
 	result = true;
 	for (i = numCols; --i >= 0;)
 	{
 		AttrNumber	att = matchColIdx[i];
 		Datum		attr1,
 					attr2;
 		bool		isNull1,
 					isNull2;
 		attr1 = heap_getattr(tuple1,
 							 att,
 							 tupdesc,
 							 &isNull1);
 		attr2 = heap_getattr(tuple2,
 							 att,
 							 tupdesc,
 							 &isNull2);
 		if (isNull1 != isNull2)
 		{
 			result = false;		/* one null and one not; they aren't equal */
 			break;
 		}
 		if (isNull1)
 			continue;			/* both are null, treat as equal */
 		/* Apply the type-specific equality function */
 		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
 										attr1, attr2)))
 		{
 			result = false;		/* they aren't equal */
 			break;
 		}
 	}
 	MemoryContextSwitchTo(oldContext);
 	return result;
 }
 /*
 * execTuplesMatchPrepare
 *		Look up the equality functions needed for execTuplesMatch.
 *		The result is a palloc'd array.
 */
 FmgrInfo *
 execTuplesMatchPrepare(TupleDesc tupdesc,
 					   int numCols,
 					   AttrNumber *matchColIdx)
 {
 	FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 	int			i;
 	for (i = 0; i < numCols; i++)
 	{
 		AttrNumber	att = matchColIdx[i];
 		Oid			typid = tupdesc->attrs[att - 1]->atttypid;
 		Oid			eq_function;
 		eq_function = equality_oper_funcid(typid);
 		fmgr_info(eq_function, &eqfunctions[i]);
 	}
 	return eqfunctions;
 }
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.74 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -20,10 +20,6 @@
 */
 #include "postgres.h"
 #include <limits.h>
 #include <math.h>
 #include "access/hash.h"
 #include "executor/execdebug.h"
 #include "executor/nodeHash.h"
 #include "executor/nodeHashjoin.h"
@ -642,74 +638,6 @@ ExecScanHashBucket(HashJoinState *hjstate,
 	return NULL;
 }
 /* ----------------------------------------------------------------
 *		ComputeHashFunc
 *
 *		the hash function for hash joins (also used for hash aggregation)
 *
 *		XXX this probably ought to be replaced with datatype-specific
 *		hash functions, such as those already implemented for hash indexes.
 * ----------------------------------------------------------------
 */
 uint32
 ComputeHashFunc(Datum key, int typLen, bool byVal)
 {
 	unsigned char *k;
 	if (byVal)
 	{
 		/*
 		 * If it's a by-value data type, just hash the whole Datum value.
 		 * This assumes that datatypes narrower than Datum are
 		 * consistently padded (either zero-extended or sign-extended, but
 		 * not random bits) to fill Datum; see the XXXGetDatum macros in
 		 * postgres.h. NOTE: it would not work to do hash_any(&key, len)
 		 * since this would get the wrong bytes on a big-endian machine.
 		 */
 		k = (unsigned char *) &key;
 		typLen = sizeof(Datum);
 	}
 	else
 	{
 		if (typLen > 0)
 		{
 			/* fixed-width pass-by-reference type */
 			k = (unsigned char *) DatumGetPointer(key);
 		}
 		else if (typLen == -1)
 		{
 			/*
 			 * It's a varlena type, so 'key' points to a "struct varlena".
 			 * NOTE: VARSIZE returns the "real" data length plus the
 			 * sizeof the "vl_len" attribute of varlena (the length
 			 * information). 'key' points to the beginning of the varlena
 			 * struct, so we have to use "VARDATA" to find the beginning
 			 * of the "real" data.	Also, we have to be careful to detoast
 			 * the datum if it's toasted.  (We don't worry about freeing
 			 * the detoasted copy; that happens for free when the
 			 * per-tuple memory context is reset in ExecHashGetBucket.)
 			 */
 			struct varlena *vkey = PG_DETOAST_DATUM(key);
 			typLen = VARSIZE(vkey) - VARHDRSZ;
 			k = (unsigned char *) VARDATA(vkey);
 		}
 		else if (typLen == -2)
 		{
 			/* It's a null-terminated C string */
 			typLen = strlen(DatumGetCString(key)) + 1;
 			k = (unsigned char *) DatumGetPointer(key);
 		}
 		else
 		{
 			elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
 			k = NULL;			/* keep compiler quiet */
 		}
 	}
 	return DatumGetUInt32(hash_any(k, typLen));
 }
 /* ----------------------------------------------------------------
 *		ExecHashTableReset
 *
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@ -21,7 +21,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.8 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.9 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -36,9 +36,9 @@
 #include "access/heapam.h"
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
 #include "executor/nodeSetOp.h"
 /* ----------------------------------------------------------------
 *		ExecSetOp
 * ----------------------------------------------------------------
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.36 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.37 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -27,9 +27,9 @@
 #include "access/heapam.h"
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
 #include "executor/nodeUnique.h"
 /* ----------------------------------------------------------------
 *		ExecUnique
 *
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: executor.h,v 1.85 2002/12/15 21:01:34 tgl Exp $
+ * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -36,6 +36,31 @@ extern void ExecMarkPos(PlanState *node);
 extern void ExecRestrPos(PlanState *node);
 extern bool ExecSupportsMarkRestore(NodeTag plantype);
 /*
 * prototypes from functions in execGrouping.c
 */
 extern bool execTuplesMatch(HeapTuple tuple1,
 				HeapTuple tuple2,
 				TupleDesc tupdesc,
 				int numCols,
 				AttrNumber *matchColIdx,
 				FmgrInfo *eqfunctions,
 				MemoryContext evalContext);
 extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
 					   int numCols,
 					   AttrNumber *matchColIdx);
 extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
 extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 										  FmgrInfo *eqfunctions,
 										  int nbuckets, Size entrysize,
 										  MemoryContext tablecxt,
 										  MemoryContext tempcxt);
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 										   TupleTableSlot *slot,
 										   bool *isnew);
 extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
 										 TupleHashIterator *state);
 /*
 * prototypes from functions in execJunk.c
 */
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------
 *
 * nodeAgg.h
- *
+ *	  prototypes for nodeAgg.c
 *
 *
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: nodeAgg.h,v 1.18 2002/12/05 15:50:36 tgl Exp $
+ * $Id: nodeAgg.h,v 1.19 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
--- a/src/include/executor/nodeGroup.h
+++ b/src/include/executor/nodeGroup.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: nodeGroup.h,v 1.23 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeGroup.h,v 1.24 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -22,15 +22,4 @@ extern TupleTableSlot *ExecGroup(GroupState *node);
 extern void ExecEndGroup(GroupState *node);
 extern void ExecReScanGroup(GroupState *node, ExprContext *exprCtxt);
 extern bool execTuplesMatch(HeapTuple tuple1,
 				HeapTuple tuple2,
 				TupleDesc tupdesc,
 				int numCols,
 				AttrNumber *matchColIdx,
 				FmgrInfo *eqfunctions,
 				MemoryContext evalContext);
 extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
 					   int numCols,
 					   AttrNumber *matchColIdx);
 #endif   /* NODEGROUP_H */
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------
 *
 * nodeHash.h
- *
+ *	  prototypes for nodeHash.c
 *
 *
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
+ * $Id: nodeHash.h,v 1.29 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -38,6 +38,5 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
 						int *virtualbuckets,
 						int *physicalbuckets,
 						int *numbatches);
 extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
 #endif   /* NODEHASH_H */
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: execnodes.h,v 1.89 2003/01/10 21:08:15 tgl Exp $
+ * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -324,6 +324,46 @@ typedef struct EState
 } EState;
 /* ----------------------------------------------------------------
 *				 Tuple Hash Tables
 *
 * All-in-memory tuple hash tables are used for a number of purposes.
 * ----------------------------------------------------------------
 */
 typedef struct TupleHashEntryData *TupleHashEntry;
 typedef struct TupleHashTableData *TupleHashTable;
 typedef struct TupleHashEntryData
 {
 	TupleHashEntry next;		/* next entry in same hash bucket */
 	uint32		hashkey;		/* exact hash key of this entry */
 	HeapTuple	firstTuple;		/* copy of first tuple in this group */
 	/* there may be additional data beyond the end of this struct */
 } TupleHashEntryData;			/* VARIABLE LENGTH STRUCT */
 typedef struct TupleHashTableData
 {
 	int			numCols;		/* number of columns in lookup key */
 	AttrNumber *keyColIdx;		/* attr numbers of key columns */
 	FmgrInfo   *eqfunctions;	/* lookup data for comparison functions */
 	MemoryContext tablecxt;		/* memory context containing table */
 	MemoryContext tempcxt;		/* context for function evaluations */
 	Size		entrysize;		/* actual size to make each hash entry */
 	int			nbuckets;		/* number of buckets in hash table */
 	TupleHashEntry buckets[1];	/* VARIABLE LENGTH ARRAY */
 } TupleHashTableData;			/* VARIABLE LENGTH STRUCT */
 typedef struct
 {
 	TupleHashEntry next_entry;	/* next entry in current chain */
 	int			next_bucket;	/* next chain */
 } TupleHashIterator;
 #define ResetTupleHashIterator(iter) \
 	((iter)->next_entry = NULL, \
 	 (iter)->next_bucket = 0)
 /* ----------------------------------------------------------------
 *				 Expression State Trees
 *
@ -445,9 +485,6 @@ typedef struct BoolExprState
 *		SubPlanState node
 * ----------------
 */
 /* this struct is private in nodeSubplan.c: */
 typedef struct SubPlanHashTableData *SubPlanHashTable;
 typedef struct SubPlanState
 {
 	ExprState	xprstate;
@ -458,8 +495,8 @@ typedef struct SubPlanState
 	bool		needShutdown;	/* TRUE = need to shutdown subplan */
 	HeapTuple	curTuple;		/* copy of most recent tuple from subplan */
 	/* these are used when hashing the subselect's output: */
-	SubPlanHashTable hashtable;	/* hash table for no-nulls subselect rows */
+	TupleHashTable hashtable;	/* hash table for no-nulls subselect rows */
-	SubPlanHashTable hashnulls;	/* hash table for rows with null(s) */
+	TupleHashTable hashnulls;	/* hash table for rows with null(s) */
 } SubPlanState;
 /* ----------------
@ -877,8 +914,6 @@ typedef struct GroupState
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggHashEntryData *AggHashEntry;
 typedef struct AggHashTableData *AggHashTable;
 typedef struct AggState
 {
@ -894,10 +929,9 @@ typedef struct AggState
 	AggStatePerGroup pergroup;	/* per-Aggref-per-group working state */
 	HeapTuple	grp_firstTuple;	/* copy of first tuple of current group */
 	/* these fields are used in AGG_HASHED mode: */
-	AggHashTable hashtable;		/* hash table with one entry per group */
+	TupleHashTable hashtable;	/* hash table with one entry per group */
 	bool		table_filled;	/* hash table filled yet? */
-	AggHashEntry next_hash_entry; /* next entry in current chain */
+	TupleHashIterator hashiter;	/* for iterating through hash table */
 	int			next_hash_bucket; /* next chain */
 } AggState;
 /* ----------------