@ -4,33 +4,38 @@
* Routines to handle INTERSECT and EXCEPT selection
*
* The input of a SetOp node consists of tuples from two relations ,
* which have been combined into one dataset and sorted on all the nonjunk
* attributes . In addition there is a junk attribute that shows which
* relation each tuple came from . The SetOp node scans each group of
* identical tuples to determine how many came from each input relation .
* Then it is a simple matter to emit the output demanded by the SQL spec
* for INTERSECT , INTERSECT ALL , EXCEPT , or EXCEPT ALL .
* which have been combined into one dataset , with a junk attribute added
* that shows which relation each tuple came from . In SETOP_SORTED mode ,
* the input has furthermore been sorted according to all the grouping
* columns ( ie , all the non - junk attributes ) . The SetOp node scans each
* group of identical tuples to determine how many came from each input
* relation . Then it is a simple matter to emit the output demanded by the
* SQL spec for INTERSECT , INTERSECT ALL , EXCEPT , or EXCEPT ALL .
*
* In SETOP_HASHED mode , the input is delivered in no particular order .
* We build a hash table in memory with one entry for each group of
* identical tuples , and count the number of tuples in the group from
* each relation . After seeing all the input , we scan the hashtable and
* generate the correct output using those counts .
*
* This node type is not used for UNION or UNION ALL , since those can be
* implemented more cheaply ( there ' s no need for the junk attribute to
* identify the source relation ) .
*
* Note that SetOp does no qual checking nor projection . The delivered
* output tuples are just copies of the first - to - arrive tuple in each
* input group .
*
*
* Portions Copyright ( c ) 1996 - 2008 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
*
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / executor / nodeSetOp . c , v 1.25 2008 / 01 / 01 19 : 45 : 49 momjian Exp $
* $ PostgreSQL : pgsql / src / backend / executor / nodeSetOp . c , v 1.26 2008 / 08 / 07 03 : 04 : 03 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
/*
* INTERFACE ROUTINES
* ExecSetOp - filter input to generate INTERSECT / EXCEPT results
* ExecInitSetOp - initialize node and subnodes . .
* ExecEndSetOp - shutdown node and subnodes
*/
# include "postgres.h"
@ -39,6 +44,160 @@
# include "utils/memutils.h"
/*
* SetOpStatePerGroupData - per - group working state
*
* These values are working state that is initialized at the start of
* an input tuple group and updated for each input tuple .
*
* In SETOP_SORTED mode , we need only one of these structs , and it ' s kept in
* the plan state node . In SETOP_HASHED mode , the hash table contains one
* of these for each tuple group .
*/
typedef struct SetOpStatePerGroupData
{
long numLeft ; /* number of left-input dups in group */
long numRight ; /* number of right-input dups in group */
} SetOpStatePerGroupData ;
/*
* To implement hashed mode , we need a hashtable that stores a
* representative tuple and the duplicate counts for each distinct set
* of grouping columns . We compute the hash key from the grouping columns .
*/
typedef struct SetOpHashEntryData * SetOpHashEntry ;
typedef struct SetOpHashEntryData
{
TupleHashEntryData shared ; /* common header for hash table entries */
SetOpStatePerGroupData pergroup ;
} SetOpHashEntryData ;
static TupleTableSlot * setop_retrieve_direct ( SetOpState * setopstate ) ;
static void setop_fill_hash_table ( SetOpState * setopstate ) ;
static TupleTableSlot * setop_retrieve_hash_table ( SetOpState * setopstate ) ;
/*
* Initialize state for a new group of input values .
*/
static void
initialize_counts ( SetOpState * setopstate , SetOpStatePerGroup pergroup )
{
pergroup - > numLeft = pergroup - > numRight = 0 ;
}
/*
* Advance the appropriate counter for one input tuple .
*/
static void
advance_counts ( SetOpState * setopstate , SetOpStatePerGroup pergroup ,
TupleTableSlot * inputslot )
{
SetOp * node = ( SetOp * ) setopstate - > ps . plan ;
int flag ;
bool isNull ;
flag = DatumGetInt32 ( slot_getattr ( inputslot ,
node - > flagColIdx ,
& isNull ) ) ;
Assert ( ! isNull ) ;
if ( flag )
pergroup - > numRight + + ;
else
pergroup - > numLeft + + ;
}
/*
* Initialize the hash table to empty .
*/
static void
build_hash_table ( SetOpState * setopstate )
{
SetOp * node = ( SetOp * ) setopstate - > ps . plan ;
Assert ( node - > strategy = = SETOP_HASHED ) ;
Assert ( node - > numGroups > 0 ) ;
setopstate - > hashtable = BuildTupleHashTable ( node - > numCols ,
node - > dupColIdx ,
setopstate - > eqfunctions ,
setopstate - > hashfunctions ,
node - > numGroups ,
sizeof ( SetOpHashEntryData ) ,
setopstate - > tableContext ,
setopstate - > tempContext ) ;
}
/*
* Find or create a hashtable entry for the tuple group containing the
* given tuple .
*/
static SetOpHashEntry
lookup_hash_entry ( SetOpState * setopstate , TupleTableSlot * inputslot )
{
SetOpHashEntry entry ;
bool isnew ;
/* find or create the hashtable entry */
entry = ( SetOpHashEntry ) LookupTupleHashEntry ( setopstate - > hashtable ,
inputslot ,
& isnew ) ;
if ( isnew )
{
/* initialize counts for new tuple group */
initialize_counts ( setopstate , & entry - > pergroup ) ;
}
/* Must reset temp context after each hashtable lookup */
MemoryContextReset ( setopstate - > tempContext ) ;
return entry ;
}
/*
* We ' ve completed processing a tuple group . Decide how many copies ( if any )
* of its representative row to emit , and store the count into numOutput .
* This logic is straight from the SQL92 specification .
*/
static void
set_output_count ( SetOpState * setopstate , SetOpStatePerGroup pergroup )
{
SetOp * plannode = ( SetOp * ) setopstate - > ps . plan ;
switch ( plannode - > cmd )
{
case SETOPCMD_INTERSECT :
if ( pergroup - > numLeft > 0 & & pergroup - > numRight > 0 )
setopstate - > numOutput = 1 ;
else
setopstate - > numOutput = 0 ;
break ;
case SETOPCMD_INTERSECT_ALL :
setopstate - > numOutput =
( pergroup - > numLeft < pergroup - > numRight ) ?
pergroup - > numLeft : pergroup - > numRight ;
break ;
case SETOPCMD_EXCEPT :
if ( pergroup - > numLeft > 0 & & pergroup - > numRight = = 0 )
setopstate - > numOutput = 1 ;
else
setopstate - > numOutput = 0 ;
break ;
case SETOPCMD_EXCEPT_ALL :
setopstate - > numOutput =
( pergroup - > numLeft < pergroup - > numRight ) ?
0 : ( pergroup - > numLeft - pergroup - > numRight ) ;
break ;
default :
elog ( ERROR , " unrecognized set op: %d " , ( int ) plannode - > cmd ) ;
break ;
}
}
/* ----------------------------------------------------------------
* ExecSetOp
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -47,14 +206,7 @@ TupleTableSlot * /* return: a tuple or NULL */
ExecSetOp ( SetOpState * node )
{
SetOp * plannode = ( SetOp * ) node - > ps . plan ;
TupleTableSlot * resultTupleSlot ;
PlanState * outerPlan ;
/*
* get information from the node
*/
outerPlan = outerPlanState ( node ) ;
resultTupleSlot = node - > ps . ps_ResultTupleSlot ;
TupleTableSlot * resultTupleSlot = node - > ps . ps_ResultTupleSlot ;
/*
* If the previously - returned tuple needs to be returned more than once ,
@ -66,142 +218,218 @@ ExecSetOp(SetOpState *node)
return resultTupleSlot ;
}
/* Flag that we have no current tuple */
ExecClearTuple ( resultTupleSlot ) ;
/* Otherwise, we're done if we are out of groups */
if ( node - > setop_done )
return NULL ;
/* Fetch the next tuple group according to the correct strategy */
if ( plannode - > strategy = = SETOP_HASHED )
{
if ( ! node - > table_filled )
setop_fill_hash_table ( node ) ;
return setop_retrieve_hash_table ( node ) ;
}
else
return setop_retrieve_direct ( node ) ;
}
/*
* ExecSetOp for non - hashed case
*/
static TupleTableSlot *
setop_retrieve_direct ( SetOpState * setopstate )
{
SetOp * node = ( SetOp * ) setopstate - > ps . plan ;
PlanState * outerPlan ;
SetOpStatePerGroup pergroup ;
TupleTableSlot * outerslot ;
TupleTableSlot * resultTupleSlot ;
/*
* Absorb groups of duplicate tuples , counting them , and saving the first
* of each group as a possible return value . At the end of each group ,
* decide whether to return anything .
*
* We assume that the tuples arrive in sorted order so we can detect
* duplicates easily .
* get state info from node
*/
for ( ; ; )
{
TupleTableSlot * inputTupleSlot ;
bool endOfGroup ;
outerPlan = outerPlanState ( setopstate ) ;
pergroup = setopstate - > pergroup ;
resultTupleSlot = setopstate - > ps . ps_ResultTupleSlot ;
/*
* We loop retrieving groups until we find one we should return
*/
while ( ! setopstate - > setop_done )
{
/*
* fetch a tuple from the outer subplan , unless we already did .
* If we don ' t already have the first tuple of the new group , fetch it
* from the outer plan .
*/
if ( node - > ps . ps_OuterTupleSlot = = NULL & &
! node - > subplan_done )
{
node - > ps . ps_OuterTupleSlot =
ExecProcNode ( outerPlan ) ;
if ( TupIsNull ( node - > ps . ps_OuterTupleSlot ) )
node - > subplan_done = true ;
}
inputTupleSlot = node - > ps . ps_OuterTupleSlot ;
if ( TupIsNull ( resultTupleSlot ) )
if ( setopstate - > grp_firstTuple = = NULL )
{
/*
* First of group : save a copy in result slot , and reset
* duplicate - counters for new group .
*/
if ( node - > subplan_done )
return NULL ; /* no more tuples */
ExecCopySlot ( resultTupleSlot , inputTupleSlot ) ;
node - > numLeft = 0 ;
node - > numRight = 0 ;
endOfGroup = false ;
}
else if ( node - > subplan_done )
{
/*
* Reached end of input , so finish processing final group
*/
endOfGroup = true ;
}
else
{
/*
* Else test if the new tuple and the previously saved tuple
* match .
*/
if ( execTuplesMatch ( inputTupleSlot ,
resultTupleSlot ,
plannode - > numCols , plannode - > dupColIdx ,
node - > eqfunctions ,
node - > tempContext ) )
endOfGroup = false ;
outerslot = ExecProcNode ( outerPlan ) ;
if ( ! TupIsNull ( outerslot ) )
{
/* Make a copy of the first input tuple */
setopstate - > grp_firstTuple = ExecCopySlotTuple ( outerslot ) ;
}
else
endOfGroup = true ;
{
/* outer plan produced no tuples at all */
setopstate - > setop_done = true ;
return NULL ;
}
}
if ( endOfGroup )
/*
* Store the copied first input tuple in the tuple table slot
* reserved for it . The tuple will be deleted when it is cleared
* from the slot .
*/
ExecStoreTuple ( setopstate - > grp_firstTuple ,
resultTupleSlot ,
InvalidBuffer ,
true ) ;
setopstate - > grp_firstTuple = NULL ; /* don't keep two pointers */
/* Initialize working state for a new input tuple group */
initialize_counts ( setopstate , pergroup ) ;
/* Count the first input tuple */
advance_counts ( setopstate , pergroup , resultTupleSlot ) ;
/*
* Scan the outer plan until we exhaust it or cross a group boundary .
*/
for ( ; ; )
{
outerslot = ExecProcNode ( outerPlan ) ;
if ( TupIsNull ( outerslot ) )
{
/* no more outer-plan tuples available */
setopstate - > setop_done = true ;
break ;
}
/*
* We ' ve reached the end of the group containing resultTuple .
* Decide how many copies ( if any ) to emit . This logic is
* straight from the SQL92 specification .
* Check whether we ' ve crossed a group boundary .
*/
switch ( plannode - > cmd )
if ( ! execTuplesMatch ( resultTupleSlot ,
outerslot ,
node - > numCols , node - > dupColIdx ,
setopstate - > eqfunctions ,
setopstate - > tempContext ) )
{
case SETOPCMD_INTERSECT :
if ( node - > numLeft > 0 & & node - > numRight > 0 )
node - > numOutput = 1 ;
else
node - > numOutput = 0 ;
break ;
case SETOPCMD_INTERSECT_ALL :
node - > numOutput =
( node - > numLeft < node - > numRight ) ?
node - > numLeft : node - > numRight ;
break ;
case SETOPCMD_EXCEPT :
if ( node - > numLeft > 0 & & node - > numRight = = 0 )
node - > numOutput = 1 ;
else
node - > numOutput = 0 ;
break ;
case SETOPCMD_EXCEPT_ALL :
node - > numOutput =
( node - > numLeft < node - > numRight ) ?
0 : ( node - > numLeft - node - > numRight ) ;
break ;
default :
elog ( ERROR , " unrecognized set op: %d " ,
( int ) plannode - > cmd ) ;
break ;
}
/* Fall out of for-loop if we have tuples to emit */
if ( node - > numOutput > 0 )
/*
* Save the first input tuple of the next group .
*/
setopstate - > grp_firstTuple = ExecCopySlotTuple ( outerslot ) ;
break ;
/* Else flag that we have no current tuple, and loop around */
ExecClearTuple ( resultTupleSlot ) ;
}
/* Still in same group, so count this tuple */
advance_counts ( setopstate , pergroup , outerslot ) ;
}
else
/*
* Done scanning input tuple group . See if we should emit any
* copies of result tuple , and if so return the first copy .
*/
set_output_count ( setopstate , pergroup ) ;
if ( setopstate - > numOutput > 0 )
{
/*
* Current tuple is member of same group as resultTuple . Count it
* in the appropriate counter .
*/
int flag ;
bool isNull ;
flag = DatumGetInt32 ( slot_getattr ( inputTupleSlot ,
plannode - > flagColIdx ,
& isNull ) ) ;
Assert ( ! isNull ) ;
if ( flag )
node - > numRight + + ;
else
node - > numLeft + + ;
/* Set flag to fetch a new input tuple, and loop around */
node - > ps . ps_OuterTupleSlot = NULL ;
setopstate - > numOutput - - ;
return resultTupleSlot ;
}
}
/* No more groups */
ExecClearTuple ( resultTupleSlot ) ;
return NULL ;
}
/*
* ExecSetOp for hashed case : phase 1 , read input and build hash table
*/
static void
setop_fill_hash_table ( SetOpState * setopstate )
{
PlanState * outerPlan ;
SetOpHashEntry entry ;
TupleTableSlot * outerslot ;
/*
* get state info from node
*/
outerPlan = outerPlanState ( setopstate ) ;
/*
* If we fall out of loop , then we need to emit at least one copy of
* resultTuple .
* Process each outer - plan tuple , and then fetch the next one , until we
* exhaust the outer plan .
*/
Assert ( node - > numOutput > 0 ) ;
node - > numOutput - - ;
return resultTupleSlot ;
for ( ; ; )
{
outerslot = ExecProcNode ( outerPlan ) ;
if ( TupIsNull ( outerslot ) )
break ;
/* Find or build hashtable entry for this tuple's group */
entry = lookup_hash_entry ( setopstate , outerslot ) ;
/* Advance the counts */
advance_counts ( setopstate , & entry - > pergroup , outerslot ) ;
}
setopstate - > table_filled = true ;
/* Initialize to walk the hash table */
ResetTupleHashIterator ( setopstate - > hashtable , & setopstate - > hashiter ) ;
}
/*
* ExecSetOp for hashed case : phase 2 , retrieving groups from hash table
*/
static TupleTableSlot *
setop_retrieve_hash_table ( SetOpState * setopstate )
{
SetOpHashEntry entry ;
TupleTableSlot * resultTupleSlot ;
/*
* get state info from node
*/
resultTupleSlot = setopstate - > ps . ps_ResultTupleSlot ;
/*
* We loop retrieving groups until we find one we should return
*/
while ( ! setopstate - > setop_done )
{
/*
* Find the next entry in the hash table
*/
entry = ( SetOpHashEntry ) ScanTupleHashTable ( & setopstate - > hashiter ) ;
if ( entry = = NULL )
{
/* No more entries in hashtable, so done */
setopstate - > setop_done = true ;
return NULL ;
}
/*
* See if we should emit any copies of this tuple , and if so return
* the first copy .
*/
set_output_count ( setopstate , & entry - > pergroup ) ;
if ( setopstate - > numOutput > 0 )
{
setopstate - > numOutput - - ;
return ExecStoreMinimalTuple ( entry - > shared . firstTuple ,
resultTupleSlot ,
false ) ;
}
}
/* No more groups */
ExecClearTuple ( resultTupleSlot ) ;
return NULL ;
}
/* ----------------------------------------------------------------
@ -226,9 +454,14 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
setopstate - > ps . plan = ( Plan * ) node ;
setopstate - > ps . state = estate ;
setopstate - > ps . ps_OuterTupleSlot = NULL ;
setopstate - > subplan_done = false ;
setopstate - > eqfunctions = NULL ;
setopstate - > hashfunctions = NULL ;
setopstate - > setop_done = false ;
setopstate - > numOutput = 0 ;
setopstate - > pergroup = NULL ;
setopstate - > grp_firstTuple = NULL ;
setopstate - > hashtable = NULL ;
setopstate - > tableContext = NULL ;
/*
* Miscellaneous initialization
@ -244,6 +477,19 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
ALLOCSET_DEFAULT_INITSIZE ,
ALLOCSET_DEFAULT_MAXSIZE ) ;
/*
* If hashing , we also need a longer - lived context to store the hash
* table . The table can ' t just be kept in the per - query context because
* we want to be able to throw it away in ExecReScanSetOp .
*/
if ( node - > strategy = = SETOP_HASHED )
setopstate - > tableContext =
AllocSetContextCreate ( CurrentMemoryContext ,
" SetOp hash table " ,
ALLOCSET_DEFAULT_MINSIZE ,
ALLOCSET_DEFAULT_INITSIZE ,
ALLOCSET_DEFAULT_MAXSIZE ) ;
# define SETOP_NSLOTS 1
/*
@ -252,8 +498,13 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
ExecInitResultTupleSlot ( estate , & setopstate - > ps ) ;
/*
* then initialize outer plan
* initialize child nodes
*
* If we are hashing then the child plan does not need
* to handle REWIND efficiently ; see ExecReScanSetOp .
*/
if ( node - > strategy = = SETOP_HASHED )
eflags & = ~ EXEC_FLAG_REWIND ;
outerPlanState ( setopstate ) = ExecInitNode ( outerPlan ( node ) , estate , eflags ) ;
/*
@ -264,11 +515,30 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
setopstate - > ps . ps_ProjInfo = NULL ;
/*
* Precompute fmgr lookup data for inner loop
* Precompute fmgr lookup data for inner loop . We need both equality and
* hashing functions to do it by hashing , but only equality if not
* hashing .
*/
setopstate - > eqfunctions =
execTuplesMatchPrepare ( node - > numCols ,
node - > dupOperators ) ;
if ( node - > strategy = = SETOP_HASHED )
execTuplesHashPrepare ( node - > numCols ,
node - > dupOperators ,
& setopstate - > eqfunctions ,
& setopstate - > hashfunctions ) ;
else
setopstate - > eqfunctions =
execTuplesMatchPrepare ( node - > numCols ,
node - > dupOperators ) ;
if ( node - > strategy = = SETOP_HASHED )
{
build_hash_table ( setopstate ) ;
setopstate - > table_filled = false ;
}
else
{
setopstate - > pergroup =
( SetOpStatePerGroup ) palloc0 ( sizeof ( SetOpStatePerGroupData ) ) ;
}
return setopstate ;
}
@ -293,9 +563,11 @@ ExecEndSetOp(SetOpState *node)
{
/* clean up tuple table */
ExecClearTuple ( node - > ps . ps_ResultTupleSlot ) ;
node - > ps . ps_OuterTupleSlot = NULL ;
/* free subsidiary stuff including hashtable */
MemoryContextDelete ( node - > tempContext ) ;
if ( node - > tableContext )
MemoryContextDelete ( node - > tableContext ) ;
ExecEndNode ( outerPlanState ( node ) ) ;
}
@ -305,10 +577,50 @@ void
ExecReScanSetOp ( SetOpState * node , ExprContext * exprCtxt )
{
ExecClearTuple ( node - > ps . ps_ResultTupleSlot ) ;
node - > ps . ps_OuterTupleSlot = NULL ;
node - > subplan_done = false ;
node - > setop_done = false ;
node - > numOutput = 0 ;
if ( ( ( SetOp * ) node - > ps . plan ) - > strategy = = SETOP_HASHED )
{
/*
* In the hashed case , if we haven ' t yet built the hash table then we
* can just return ; nothing done yet , so nothing to undo . If subnode ' s
* chgParam is not NULL then it will be re - scanned by ExecProcNode ,
* else no reason to re - scan it at all .
*/
if ( ! node - > table_filled )
return ;
/*
* If we do have the hash table and the subplan does not have any
* parameter changes , then we can just rescan the existing hash table ;
* no need to build it again .
*/
if ( ( ( PlanState * ) node ) - > lefttree - > chgParam = = NULL )
{
ResetTupleHashIterator ( node - > hashtable , & node - > hashiter ) ;
return ;
}
}
/* Release first tuple of group, if we have made a copy */
if ( node - > grp_firstTuple ! = NULL )
{
heap_freetuple ( node - > grp_firstTuple ) ;
node - > grp_firstTuple = NULL ;
}
/* Release any hashtable storage */
if ( node - > tableContext )
MemoryContextResetAndDeleteChildren ( node - > tableContext ) ;
/* And rebuild empty hashtable if needed */
if ( ( ( SetOp * ) node - > ps . plan ) - > strategy = = SETOP_HASHED )
{
build_hash_table ( node ) ;
node - > table_filled = false ;
}
/*
* if chgParam of subnode is not null then plan will be re - scanned by
* first ExecProcNode .