@ -55,8 +55,10 @@ static int32 heap_compare_slots(Datum a, Datum b, void *arg);
static TupleTableSlot * gather_merge_getnext ( GatherMergeState * gm_state ) ;
static HeapTuple gm_readnext_tuple ( GatherMergeState * gm_state , int nreader ,
bool nowait , bool * done ) ;
static void gather_merge_init ( GatherMergeState * gm_state ) ;
static void ExecShutdownGatherMergeWorkers ( GatherMergeState * node ) ;
static void gather_merge_setup ( GatherMergeState * gm_state ) ;
static void gather_merge_init ( GatherMergeState * gm_state ) ;
static void gather_merge_clear_tuples ( GatherMergeState * gm_state ) ;
static bool gather_merge_readnext ( GatherMergeState * gm_state , int reader ,
bool nowait ) ;
static void load_tuple_array ( GatherMergeState * gm_state , int reader ) ;
@ -149,14 +151,17 @@ ExecInitGatherMerge(GatherMerge *node, EState *estate, int eflags)
}
/*
* s tore the tuple descriptor into gather merge state , so we can use it
* later while initializing the gather merge slots .
* S tore the tuple descriptor into gather merge state , so we can use it
* while initializing the gather merge slots .
*/
if ( ! ExecContextForcesOids ( & gm_state - > ps , & hasoid ) )
hasoid = false ;
tupDesc = ExecTypeFromTL ( outerNode - > targetlist , hasoid ) ;
gm_state - > tupDesc = tupDesc ;
/* Now allocate the workspace for gather merge */
gather_merge_setup ( gm_state ) ;
return gm_state ;
}
@ -340,6 +345,9 @@ ExecReScanGatherMerge(GatherMergeState *node)
/* Make sure any existing workers are gracefully shut down */
ExecShutdownGatherMergeWorkers ( node ) ;
/* Free any unused tuples, so we don't leak memory across rescans */
gather_merge_clear_tuples ( node ) ;
/* Mark node so that shared state will be rebuilt at next call */
node - > initialized = false ;
node - > gm_initialized = false ;
@ -370,49 +378,93 @@ ExecReScanGatherMerge(GatherMergeState *node)
}
/*
* Initialize the Gather merge tuple read .
* Set up the data structures that we ' ll need for Gather Merge .
*
* We allocate these once on the basis of gm - > num_workers , which is an
* upper bound for the number of workers we ' ll actually have . During
* a rescan , we reset the structures to empty . This approach simplifies
* not leaking memory across rescans .
*
* Pull at least a single tuple from each worker + leader and set up the heap .
* In the gm_slots [ ] array , index 0 is for the leader , and indexes 1 to n
* are for workers . The values placed into gm_heap correspond to indexes
* in gm_slots [ ] . The gm_tuple_buffers [ ] array , however , is indexed from
* 0 to n - 1 ; it has no entry for the leader .
*/
static void
gather_merge_init ( GatherMergeState * gm_state )
gather_merge_setup ( GatherMergeState * gm_state )
{
int nreaders = gm_state - > nreaders ;
bool nowait = true ;
GatherMerge * gm = castNode ( GatherMerge , gm_state - > ps . plan ) ;
int nreaders = gm - > num_workers ;
int i ;
/*
* Allocate gm_slots for the number of workers + one more slot for leader .
* Last slot is always for leader . Leader always calls ExecProcNode ( ) to
* read the tuple which will return the TupleTableSlot . Later it will
* directly get assigned to gm_slot . So just initialize leader gm_slot
* with NULL . For other slots , code below will call
* ExecInitExtraTupleSlot ( ) to create a slot for the worker ' s results .
*/
gm_state - > gm_slots =
palloc ( ( gm_state - > nreaders + 1 ) * sizeof ( TupleTableSlot * ) ) ;
gm_state - > gm_slots [ gm_state - > nreaders ] = NULL ;
/* Initializ e the tuple slot and tuple array for each worker */
gm_state - > gm_tuple_buffers =
( GMReaderTupleBuffer * ) palloc0 ( sizeof ( GMReaderTupleBuffer ) *
gm_state - > nreaders ) ;
for ( i = 0 ; i < gm_state - > nreaders ; i + + )
* Slot 0 is always for the leader . Leader always calls ExecProcNode ( ) to
* read the tuple , and then stores it directly into its gm_slots entry .
* For other slots , code below will call ExecInitExtraTupleSlot ( ) to
* create a slot for the worker ' s results . Note that during any single
* scan , we might have fewer than num_workers available workers , in which
* case the extra array entries go unused .
*/
gm_state - > gm_slots = ( TupleTableSlot * * )
palloc0 ( ( nreaders + 1 ) * sizeof ( TupleTableSlot * ) ) ;
/* Allocat e the tuple slot and tuple array for each worker */
gm_state - > gm_tuple_buffers = ( GMReaderTupleBuffer * )
palloc0 ( nreaders * sizeof ( GMReaderTupleBuffer ) ) ;
for ( i = 0 ; i < nreaders ; i + + )
{
/* Allocate the tuple array with length MAX_TUPLE_STORE */
gm_state - > gm_tuple_buffers [ i ] . tuple =
( HeapTuple * ) palloc0 ( sizeof ( HeapTuple ) * MAX_TUPLE_STORE ) ;
/* Initialize slot for worker */
gm_state - > gm_slots [ i ] = ExecInitExtraTupleSlot ( gm_state - > ps . state ) ;
ExecSetSlotDescriptor ( gm_state - > gm_slots [ i ] ,
/* Initialize tuple slot for worker */
gm_state - > gm_slots [ i + 1 ] = ExecInitExtraTupleSlot ( gm_state - > ps . state ) ;
ExecSetSlotDescriptor ( gm_state - > gm_slots [ i + 1 ] ,
gm_state - > tupDesc ) ;
}
/* Allocate the resources for the merge */
gm_state - > gm_heap = binaryheap_allocate ( gm_state - > nreaders + 1 ,
gm_state - > gm_heap = binaryheap_allocate ( nreaders + 1 ,
heap_compare_slots ,
gm_state ) ;
}
/*
* Initialize the Gather Merge .
*
* Reset data structures to ensure they ' re empty . Then pull at least one
* tuple from leader + each worker ( or set its " done " indicator ) , and set up
* the heap .
*/
static void
gather_merge_init ( GatherMergeState * gm_state )
{
int nreaders = gm_state - > nreaders ;
bool nowait = true ;
int i ;
/* Assert that gather_merge_setup made enough space */
Assert ( nreaders < = castNode ( GatherMerge , gm_state - > ps . plan ) - > num_workers ) ;
/* Reset leader's tuple slot to empty */
gm_state - > gm_slots [ 0 ] = NULL ;
/* Reset the tuple slot and tuple array for each worker */
for ( i = 0 ; i < nreaders ; i + + )
{
/* Reset tuple array to empty */
gm_state - > gm_tuple_buffers [ i ] . nTuples = 0 ;
gm_state - > gm_tuple_buffers [ i ] . readCounter = 0 ;
/* Reset done flag to not-done */
gm_state - > gm_tuple_buffers [ i ] . done = false ;
/* Ensure output slot is empty */
ExecClearTuple ( gm_state - > gm_slots [ i + 1 ] ) ;
}
/* Reset binary heap to empty */
binaryheap_reset ( gm_state - > gm_heap ) ;
/*
* First , try to read a tuple from each worker ( including leader ) in
@ -422,14 +474,13 @@ gather_merge_init(GatherMergeState *gm_state)
* least one tuple ) to the heap .
*/
reread :
for ( i = 0 ; i < nreaders + 1 ; i + + )
for ( i = 0 ; i < = nreaders ; i + + )
{
CHECK_FOR_INTERRUPTS ( ) ;
/* ignore this source if already known done */
if ( ( i < nreaders ) ?
! gm_state - > gm_tuple_buffers [ i ] . done :
gm_state - > need_to_scan_locally )
/* skip this source if already known done */
if ( ( i = = 0 ) ? gm_state - > need_to_scan_locally :
! gm_state - > gm_tuple_buffers [ i - 1 ] . done )
{
if ( TupIsNull ( gm_state - > gm_slots [ i ] ) )
{
@ -450,9 +501,9 @@ reread:
}
/* need not recheck leader, since nowait doesn't matter for it */
for ( i = 0 ; i < nreaders ; i + + )
for ( i = 1 ; i < = nreaders ; i + + )
{
if ( ! gm_state - > gm_tuple_buffers [ i ] . done & &
if ( ! gm_state - > gm_tuple_buffers [ i - 1 ] . done & &
TupIsNull ( gm_state - > gm_slots [ i ] ) )
{
nowait = false ;
@ -467,23 +518,23 @@ reread:
}
/*
* Clear out the tuple table slots for each gather merge input .
* Clear out the tuple table slot , and any unused pending tuples ,
* for each gather merge input .
*/
static void
gather_merge_clear_slot s ( GatherMergeState * gm_state )
gather_merge_clear_tuple s ( GatherMergeState * gm_state )
{
int i ;
for ( i = 0 ; i < gm_state - > nreaders ; i + + )
{
pfree ( gm_state - > gm_tuple_buffers [ i ] . tuple ) ;
ExecClearTuple ( gm_state - > gm_slots [ i ] ) ;
}
GMReaderTupleBuffer * tuple_buffer = & gm_state - > gm_tuple_buffers [ i ] ;
/* Free tuple array as we don't need it any more */
pfree ( gm_state - > gm_tuple_buffers ) ;
/* Free the binaryheap, which was created for sort */
binaryheap_free ( gm_state - > gm_heap ) ;
while ( tuple_buffer - > readCounter < tuple_buffer - > nTuples )
heap_freetuple ( tuple_buffer - > tuple [ tuple_buffer - > readCounter + + ] ) ;
ExecClearTuple ( gm_state - > gm_slots [ i + 1 ] ) ;
}
}
/*
@ -526,7 +577,7 @@ gather_merge_getnext(GatherMergeState *gm_state)
if ( binaryheap_empty ( gm_state - > gm_heap ) )
{
/* All the queues are exhausted, and so is the heap */
gather_merge_clear_slot s ( gm_state ) ;
gather_merge_clear_tuple s ( gm_state ) ;
return NULL ;
}
else
@ -548,10 +599,10 @@ load_tuple_array(GatherMergeState *gm_state, int reader)
int i ;
/* Don't do anything if this is the leader. */
if ( reader = = gm_state - > nreaders )
if ( reader = = 0 )
return ;
tuple_buffer = & gm_state - > gm_tuple_buffers [ reader ] ;
tuple_buffer = & gm_state - > gm_tuple_buffers [ reader - 1 ] ;
/* If there's nothing in the array, reset the counters to zero. */
if ( tuple_buffer - > nTuples = = tuple_buffer - > readCounter )
@ -590,7 +641,7 @@ gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
* If we ' re being asked to generate a tuple from the leader , then we just
* call ExecProcNode as normal to produce one .
*/
if ( gm_state - > n readers = = reader )
if ( reader = = 0 )
{
if ( gm_state - > need_to_scan_locally )
{
@ -601,7 +652,7 @@ gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
if ( ! TupIsNull ( outerTupleSlot ) )
{
gm_state - > gm_slots [ reader ] = outerTupleSlot ;
gm_state - > gm_slots [ 0 ] = outerTupleSlot ;
return true ;
}
/* need_to_scan_locally serves as "done" flag for leader */
@ -611,7 +662,7 @@ gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
}
/* Otherwise, check the state of the relevant tuple buffer. */
tuple_buffer = & gm_state - > gm_tuple_buffers [ reader ] ;
tuple_buffer = & gm_state - > gm_tuple_buffers [ reader - 1 ] ;
if ( tuple_buffer - > nTuples > tuple_buffer - > readCounter )
{
@ -621,8 +672,8 @@ gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
else if ( tuple_buffer - > done )
{
/* Reader is known to be exhausted. */
DestroyTupleQueueReader ( gm_state - > reader [ reader ] ) ;
gm_state - > reader [ reader ] = NULL ;
DestroyTupleQueueReader ( gm_state - > reader [ reader - 1 ] ) ;
gm_state - > reader [ reader - 1 ] = NULL ;
return false ;
}
else
@ -649,14 +700,14 @@ gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
ExecStoreTuple ( tup , /* tuple to store */
gm_state - > gm_slots [ reader ] , /* slot in which to store the
* tuple */
InvalidBuffer , /* buffer associated with this tuple */
true ) ; /* pfree this pointer if not from heap */
InvalidBuffer , /* no buffer associated with tuple */
true ) ; /* pfree tuple when done with it */
return true ;
}
/*
* Attempt to read a tuple from given read er.
* Attempt to read a tuple from given work er.
*/
static HeapTuple
gm_readnext_tuple ( GatherMergeState * gm_state , int nreader , bool nowait ,
@ -671,7 +722,7 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
CHECK_FOR_INTERRUPTS ( ) ;
/* Attempt to read a tuple. */
reader = gm_state - > reader [ nreader ] ;
reader = gm_state - > reader [ nreader - 1 ] ;
/* Run TupleQueueReaders in per-tuple context */
tupleContext = gm_state - > ps . ps_ExprContext - > ecxt_per_tuple_memory ;