@ -18,6 +18,13 @@
# include "miscadmin.h"
# include "utils/rel.h"
static void ginFindParents ( GinBtree btree , GinBtreeStack * stack ) ;
static bool ginPlaceToPage ( GinBtree btree , GinBtreeStack * stack ,
void * insertdata , BlockNumber updateblkno ,
Buffer childbuf , GinStatsData * buildStats ) ;
static void ginFinishSplit ( GinBtree btree , GinBtreeStack * stack ,
bool freestack , GinStatsData * buildStats ) ;
/*
* Lock buffer by needed method for search .
*/
@ -85,6 +92,13 @@ ginFindLeafPage(GinBtree btree, bool searchMode)
access = ginTraverseLock ( stack - > buffer , searchMode ) ;
/*
* If we ' re going to modify the tree , finish any incomplete splits we
* encounter on the way .
*/
if ( ! searchMode & & GinPageIsIncompleteSplit ( page ) )
ginFinishSplit ( btree , stack , false , NULL ) ;
/*
* ok , page is correctly locked , we should check to move right . . ,
* root never has a right link , so small optimization
@ -101,6 +115,9 @@ ginFindLeafPage(GinBtree btree, bool searchMode)
stack - > buffer = ginStepRight ( stack - > buffer , btree - > index , access ) ;
stack - > blkno = rightlink ;
page = BufferGetPage ( stack - > buffer ) ;
if ( ! searchMode & & GinPageIsIncompleteSplit ( page ) )
ginFinishSplit ( btree , stack , false , NULL ) ;
}
if ( GinPageIsLeaf ( page ) ) /* we found, return locked page */
@ -187,7 +204,7 @@ freeGinBtreeStack(GinBtreeStack *stack)
* child ' s offset in stack - > parent . The root page is never released , to
* to prevent conflict with vacuum process .
*/
void
static void
ginFindParents ( GinBtree btree , GinBtreeStack * stack )
{
Page page ;
@ -195,24 +212,17 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
BlockNumber blkno ,
leftmostBlkno ;
OffsetNumber offset ;
GinBtreeStack * root = stack - > parent ;
GinBtreeStack * root ;
GinBtreeStack * ptr ;
if ( ! root )
{
/* XLog mode... */
root = ( GinBtreeStack * ) palloc ( sizeof ( GinBtreeStack ) ) ;
root - > blkno = btree - > rootBlkno ;
root - > buffer = ReadBuffer ( btree - > index , btree - > rootBlkno ) ;
LockBuffer ( root - > buffer , GIN_EXCLUSIVE ) ;
root - > parent = NULL ;
}
else
{
/*
* find root , we should not release root page until update is
* finished ! !
* Unwind the stack all the way up to the root , leaving only the root
* item .
*
* Be careful not to release the pin on the root page ! The pin on root
* page is required to lock out concurrent vacuums on the tree .
*/
root = stack - > parent ;
while ( root - > parent )
{
ReleaseBuffer ( root - > buffer ) ;
@ -221,32 +231,36 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
Assert ( root - > blkno = = btree - > rootBlkno ) ;
Assert ( BufferGetBlockNumber ( root - > buffer ) = = btree - > rootBlkno ) ;
LockBuffer ( root - > buffer , GIN_EXCLUSIVE ) ;
}
root - > off = InvalidOffsetNumber ;
page = BufferGetPage ( root - > buffer ) ;
Assert ( ! GinPageIsLeaf ( page ) ) ;
/* check trivial case */
if ( ( root - > off = btree - > findChildPtr ( btree , page , stack - > blkno , InvalidOffsetNumber ) ) ! = InvalidOffsetNumber )
{
stack - > parent = root ;
return ;
}
blkno = root - > blkno ;
buffer = root - > buffer ;
offset = InvalidOffsetNumber ;
leftmostBlkno = blkno = btree - > getLeftMostChild ( btree , page ) ;
LockBuffer ( root - > buffer , GIN_UNLOCK ) ;
Assert ( blkno ! = InvalidBlockNumber ) ;
ptr = ( GinBtreeStack * ) palloc ( sizeof ( GinBtreeStack ) ) ;
for ( ; ; )
{
buffer = ReadBuffer ( btree - > index , blkno ) ;
LockBuffer ( buffer , GIN_EXCLUSIVE ) ;
page = BufferGetPage ( buffer ) ;
if ( GinPageIsLeaf ( page ) )
elog ( ERROR , " Lost path " ) ;
if ( GinPageIsIncompleteSplit ( page ) )
{
Assert ( blkno ! = btree - > rootBlkno ) ;
ptr - > blkno = blkno ;
ptr - > buffer = buffer ;
/*
* parent may be wrong , but if so , the ginFinishSplit call will
* recurse to call ginFindParents again to fix it .
*/
ptr - > parent = root ;
ptr - > off = InvalidOffsetNumber ;
ginFinishSplit ( btree , ptr , false , NULL ) ;
}
leftmostBlkno = btree - > getLeftMostChild ( btree , page ) ;
while ( ( offset = btree - > findChildPtr ( btree , page , stack - > blkno , InvalidOffsetNumber ) ) = = InvalidOffsetNumber )
@ -259,11 +273,22 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
}
buffer = ginStepRight ( buffer , btree - > index , GIN_EXCLUSIVE ) ;
page = BufferGetPage ( buffer ) ;
/* finish any incomplete splits, as above */
if ( GinPageIsIncompleteSplit ( page ) )
{
Assert ( blkno ! = btree - > rootBlkno ) ;
ptr - > blkno = blkno ;
ptr - > buffer = buffer ;
ptr - > parent = root ;
ptr - > off = InvalidOffsetNumber ;
ginFinishSplit ( btree , ptr , false , NULL ) ;
}
}
if ( blkno ! = InvalidBlockNumber )
{
ptr = ( GinBtreeStack * ) palloc ( sizeof ( GinBtreeStack ) ) ;
ptr - > blkno = blkno ;
ptr - > buffer = buffer ;
ptr - > parent = root ; /* it may be wrong, but in next call we will
@ -273,7 +298,9 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
return ;
}
/* Descend down to next level */
blkno = leftmostBlkno ;
buffer = ReadBuffer ( btree - > index , blkno ) ;
}
}
@ -284,19 +311,38 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
* the parent needs to be updated . ( a root split returns true as it doesn ' t
* need any further action by the caller to complete )
*
* When inserting a downlink to a internal page , the existing item at the
* given location is updated to point to ' updateblkno ' .
* When inserting a downlink to a internal page , ' childbuf ' contains the
* child page that was split . Its GIN_INCOMPLETE_SPLIT flag will be cleared
* atomically with the insert . Also , the existing item at the given location
* is updated to point to ' updateblkno ' .
*
* stack - > buffer is locked on entry , and is kept locked .
*/
static bool
ginPlaceToPage ( GinBtree btree , GinBtreeStack * stack ,
void * insertdata , BlockNumber updateblkno ,
GinStatsData * buildStats )
Buffer childbuf , GinStatsData * buildStats )
{
Page page = BufferGetPage ( stack - > buffer ) ;
XLogRecData * rdata ;
XLogRecData * payload rdata;
bool fit ;
uint16 xlflags = 0 ;
Page childpage = NULL ;
if ( GinPageIsData ( page ) )
xlflags | = GIN_INSERT_ISDATA ;
if ( GinPageIsLeaf ( page ) )
{
xlflags | = GIN_INSERT_ISLEAF ;
Assert ( ! BufferIsValid ( childbuf ) ) ;
Assert ( updateblkno = = InvalidBlockNumber ) ;
}
else
{
Assert ( BufferIsValid ( childbuf ) ) ;
Assert ( updateblkno ! = InvalidBlockNumber ) ;
childpage = BufferGetPage ( childbuf ) ;
}
/*
* Try to put the incoming tuple on the page . If it doesn ' t fit ,
@ -306,17 +352,63 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
START_CRIT_SECTION ( ) ;
fit = btree - > placeToPage ( btree , stack - > buffer , stack - > off ,
insertdata , updateblkno ,
& rdata ) ;
& payload rdata) ;
if ( fit )
{
MarkBufferDirty ( stack - > buffer ) ;
/* An insert to an internal page finishes the split of the child. */
if ( childbuf ! = InvalidBuffer )
{
GinPageGetOpaque ( childpage ) - > flags & = ~ GIN_INCOMPLETE_SPLIT ;
MarkBufferDirty ( childbuf ) ;
}
if ( RelationNeedsWAL ( btree - > index ) )
{
XLogRecPtr recptr ;
XLogRecData rdata [ 3 ] ;
ginxlogInsert xlrec ;
BlockIdData childblknos [ 2 ] ;
xlrec . node = btree - > index - > rd_node ;
xlrec . blkno = BufferGetBlockNumber ( stack - > buffer ) ;
xlrec . offset = stack - > off ;
xlrec . flags = xlflags ;
rdata [ 0 ] . buffer = InvalidBuffer ;
rdata [ 0 ] . data = ( char * ) & xlrec ;
rdata [ 0 ] . len = sizeof ( ginxlogInsert ) ;
/*
* Log information about child if this was an insertion of a
* downlink .
*/
if ( childbuf ! = InvalidBuffer )
{
rdata [ 0 ] . next = & rdata [ 1 ] ;
BlockIdSet ( & childblknos [ 0 ] , BufferGetBlockNumber ( childbuf ) ) ;
BlockIdSet ( & childblknos [ 1 ] , GinPageGetOpaque ( childpage ) - > rightlink ) ;
rdata [ 1 ] . buffer = InvalidBuffer ;
rdata [ 1 ] . data = ( char * ) childblknos ;
rdata [ 1 ] . len = sizeof ( BlockIdData ) * 2 ;
rdata [ 1 ] . next = & rdata [ 2 ] ;
rdata [ 2 ] . buffer = childbuf ;
rdata [ 2 ] . buffer_std = false ;
rdata [ 2 ] . data = NULL ;
rdata [ 2 ] . len = 0 ;
rdata [ 2 ] . next = payloadrdata ;
}
else
rdata [ 0 ] . next = payloadrdata ;
recptr = XLogInsert ( RM_GIN_ID , XLOG_GIN_INSERT , rdata ) ;
PageSetLSN ( page , recptr ) ;
if ( childbuf ! = InvalidBuffer )
PageSetLSN ( childpage , recptr ) ;
}
END_CRIT_SECTION ( ) ;
@ -329,13 +421,16 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
Buffer rbuffer ;
Page newlpage ;
BlockNumber savedRightLink ;
GinBtreeStack * parent ;
Page lpage ,
rpage ;
Page rpage ;
XLogRecData rdata [ 2 ] ;
ginxlogSplit data ;
Buffer lbuffer = InvalidBuffer ;
Page newrootpg = NULL ;
END_CRIT_SECTION ( ) ;
rbuffer = GinNewBuffer ( btree - > index ) ;
/* During index build, count the new page */
if ( buildStats )
{
@ -353,21 +448,50 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
*/
newlpage = btree - > splitPage ( btree , stack - > buffer , rbuffer , stack - > off ,
insertdata , updateblkno ,
& rdata ) ;
& payload rdata) ;
( ( ginxlogSplit * ) ( rdata - > data ) ) - > rootBlkno = btree - > rootBlkno ;
data . node = btree - > index - > rd_node ;
data . rblkno = BufferGetBlockNumber ( rbuffer ) ;
data . flags = xlflags ;
if ( childbuf ! = InvalidBuffer )
{
Page childpage = BufferGetPage ( childbuf ) ;
GinPageGetOpaque ( childpage ) - > flags & = ~ GIN_INCOMPLETE_SPLIT ;
parent = stack - > parent ;
data . leftChildBlkno = BufferGetBlockNumber ( childbuf ) ;
data . rightChildBlkno = GinPageGetOpaque ( childpage ) - > rightlink ;
}
else
data . leftChildBlkno = data . rightChildBlkno = InvalidBlockNumber ;
if ( parent = = NULL )
rdata [ 0 ] . buffer = InvalidBuffer ;
rdata [ 0 ] . data = ( char * ) & data ;
rdata [ 0 ] . len = sizeof ( ginxlogSplit ) ;
if ( childbuf ! = InvalidBuffer )
{
rdata [ 0 ] . next = & rdata [ 1 ] ;
rdata [ 1 ] . buffer = childbuf ;
rdata [ 1 ] . buffer_std = false ;
rdata [ 1 ] . data = NULL ;
rdata [ 1 ] . len = 0 ;
rdata [ 1 ] . next = payloadrdata ;
}
else
rdata [ 0 ] . next = payloadrdata ;
rpage = BufferGetPage ( rbuffer ) ;
if ( stack - > parent = = NULL )
{
/*
* split root , so we need to allocate new left page and place
* pointer on root to left and right page
*/
Buffer lbuffer = GinNewBuffer ( btree - > index ) ;
lbuffer = GinNewBuffer ( btree - > index ) ;
/* During index build, count the new page */
/* During index build, count the newly-added root page */
if ( buildStats )
{
if ( btree - > isData )
@ -376,97 +500,126 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
buildStats - > nEntryPages + + ;
}
( ( ginxlogSplit * ) ( rdata - > data ) ) - > isRootSplit = TRUE ;
( ( ginxlogSplit * ) ( rdata - > data ) ) - > rrlink = InvalidBlockNumber ;
lpage = BufferGetPage ( lbuffer ) ;
rpage = BufferGetPage ( rbuffer ) ;
/*
* root never has a right - link , so we borrow the rrlink field to
* store the root block number .
*/
data . rrlink = BufferGetBlockNumber ( stack - > buffer ) ;
data . lblkno = BufferGetBlockNumber ( lbuffer ) ;
data . flags | = GIN_SPLIT_ROOT ;
GinPageGetOpaque ( rpage ) - > rightlink = InvalidBlockNumber ;
GinPageGetOpaque ( newlpage ) - > rightlink = BufferGetBlockNumber ( rbuffer ) ;
( ( ginxlogSplit * ) ( rdata - > data ) ) - > lblkno = BufferGetBlockNumber ( lbuffer ) ;
START_CRIT_SECTION ( ) ;
GinInitBuffer ( stack - > buffer , GinPageGetOpaque ( newlpage ) - > flags & ~ GIN_LEAF ) ;
PageRestoreTempPage ( newlpage , lpage ) ;
btree - > fillRoot ( btree , stack - > buffer , lbuffer , rbuffer ) ;
MarkBufferDirty ( rbuffer ) ;
MarkBufferDirty ( lbuffer ) ;
MarkBufferDirty ( stack - > buffer ) ;
if ( RelationNeedsWAL ( btree - > index ) )
{
XLogRecPtr recptr ;
recptr = XLogInsert ( RM_GIN_ID , XLOG_GIN_SPLIT , rdata ) ;
PageSetLSN ( page , recptr ) ;
PageSetLSN ( lpage , recptr ) ;
PageSetLSN ( rpage , recptr ) ;
}
UnlockReleaseBuffer ( rbuffer ) ;
UnlockReleaseBuffer ( lbuffer ) ;
END_CRIT_SECTION ( ) ;
/* During index build, count the newly-added root page */
if ( buildStats )
{
if ( btree - > isData )
buildStats - > nDataPages + + ;
else
buildStats - > nEntryPages + + ;
}
/*
* Construct a new root page containing downlinks to the new left
* and right pages . ( do this in a temporary copy first rather
* than overwriting the original page directly , so that we can still
* abort gracefully if this fails . )
*/
newrootpg = PageGetTempPage ( rpage ) ;
GinInitPage ( newrootpg , GinPageGetOpaque ( newlpage ) - > flags & ~ GIN_LEAF , BLCKSZ ) ;
return true ;
btree - > fillRoot ( btree , newrootpg ,
BufferGetBlockNumber ( lbuffer ) , newlpage ,
BufferGetBlockNumber ( rbuffer ) , rpage ) ;
}
else
{
/* split non-root page */
( ( ginxlogSplit * ) ( rdata - > data ) ) - > isRootSplit = FALSE ;
( ( ginxlogSplit * ) ( rdata - > data ) ) - > rrlink = savedRightLink ;
lpage = BufferGetPage ( stack - > buffer ) ;
rpage = BufferGetPage ( rbuffer ) ;
data . rrlink = savedRightLink ;
data . lblkno = BufferGetBlockNumber ( stack - > buffer ) ;
GinPageGetOpaque ( rpage ) - > rightlink = savedRightLink ;
GinPageGetOpaque ( newlpage ) - > flags | = GIN_INCOMPLETE_SPLIT ;
GinPageGetOpaque ( newlpage ) - > rightlink = BufferGetBlockNumber ( rbuffer ) ;
}
/*
* Ok , we have the new contents of the left page in a temporary copy
* now ( newlpage ) , and the newly - allocated right block has been filled
* in . The original page is still unchanged .
*
* If this is a root split , we also have a temporary page containing
* the new contents of the root . Copy the new left page to a
* newly - allocated block , and initialize the ( original ) root page the
* new copy . Otherwise , copy over the temporary copy of the new left
* page over the old left page .
*/
START_CRIT_SECTION ( ) ;
PageRestoreTempPage ( newlpage , lpage ) ;
MarkBufferDirty ( rbuffer ) ;
if ( stack - > parent = = NULL )
{
PageRestoreTempPage ( newlpage , BufferGetPage ( lbuffer ) ) ;
MarkBufferDirty ( lbuffer ) ;
newlpage = newrootpg ;
}
PageRestoreTempPage ( newlpage , BufferGetPage ( stack - > buffer ) ) ;
MarkBufferDirty ( stack - > buffer ) ;
/* write WAL record */
if ( RelationNeedsWAL ( btree - > index ) )
{
XLogRecPtr recptr ;
recptr = XLogInsert ( RM_GIN_ID , XLOG_GIN_SPLIT , rdata ) ;
PageSetLSN ( lpage , recptr ) ;
PageSetLSN ( BufferGetPage ( stack - > buff er ) , recptr ) ;
PageSetLSN ( rpage , recptr ) ;
if ( stack - > parent = = NULL )
PageSetLSN ( BufferGetPage ( lbuffer ) , recptr ) ;
}
UnlockReleaseBuffer ( rbuffer ) ;
END_CRIT_SECTION ( ) ;
/*
* We can release the lock on the right page now , but keep the
* original buffer locked .
*/
UnlockReleaseBuffer ( rbuffer ) ;
if ( stack - > parent = = NULL )
UnlockReleaseBuffer ( lbuffer ) ;
/*
* If we split the root , we ' re done . Otherwise the split is not
* complete until the downlink for the new page has been inserted to
* the parent .
*/
if ( stack - > parent = = NULL )
return true ;
else
return false ;
}
}
}
/*
* Finish a split by inserting the downlink for the new page to parent .
*
* On entry , stack - > buffer is exclusively locked .
*
* NB : the passed - in stack is freed , as though by freeGinBtreeStack .
* If freestack is true , all the buffers are released and unlocked as we
* crawl up the tree , and ' stack ' is freed . Otherwise stack - > buffer is kept
* locked , and stack is unmodified , except for possibly moving right to find
* the correct parent of page .
*/
void
ginFinishSplit ( GinBtree btree , GinBtreeStack * stack , GinStatsData * buildStats )
static void
ginFinishSplit ( GinBtree btree , GinBtreeStack * stack , bool freestack ,
GinStatsData * buildStats )
{
Page page ;
bool done ;
bool first = true ;
/*
* freestack = = false when we encounter an incompletely split page during a
* scan , while freestack = = true is used in the normal scenario that a
* split is finished right after the initial insert .
*/
if ( ! freestack )
elog ( DEBUG1 , " finishing incomplete split of block %u in gin index \" %s \" " ,
stack - > blkno , RelationGetRelationName ( btree - > index ) ) ;
/* this loop crawls up the stack until the insertion is complete */
do
@ -475,19 +628,26 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
void * insertdata ;
BlockNumber updateblkno ;
insertdata = btree - > prepareDownlink ( btree , stack - > buffer ) ;
updateblkno = GinPageGetOpaque ( BufferGetPage ( stack - > buffer ) ) - > rightlink ;
/* search parent to lock */
LockBuffer ( parent - > buffer , GIN_EXCLUSIVE ) ;
/*
* If the parent page was incompletely split , finish that split first ,
* then continue with the current one .
*
* Note : we have to finish * all * incomplete splits we encounter , even
* if we have to move right . Otherwise we might choose as the target
* a page that has no downlink in the parent , and splitting it further
* would fail .
*/
if ( GinPageIsIncompleteSplit ( BufferGetPage ( parent - > buffer ) ) )
ginFinishSplit ( btree , parent , false , buildStats ) ;
/* move right if it's needed */
page = BufferGetPage ( parent - > buffer ) ;
while ( ( parent - > off = btree - > findChildPtr ( btree , page , stack - > blkno , parent - > off ) ) = = InvalidOffsetNumber )
{
BlockNumber rightlink = GinPageGetOpaque ( page ) - > rightlink ;
if ( rightlink = = InvalidBlockNumber )
if ( GinPageRightMost ( page ) )
{
/*
* rightmost page , but we don ' t find parent , we should use
@ -501,24 +661,43 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
}
parent - > buffer = ginStepRight ( parent - > buffer , btree - > index , GIN_EXCLUSIVE ) ;
parent - > blkno = rightlink ;
parent - > blkno = BufferGetBlockNumber ( parent - > buffer ) ;
page = BufferGetPage ( parent - > buffer ) ;
if ( GinPageIsIncompleteSplit ( BufferGetPage ( parent - > buffer ) ) )
ginFinishSplit ( btree , parent , false , buildStats ) ;
}
/* release the child */
UnlockReleaseBuffer ( stack - > buffer ) ;
/* insert the downlink */
insertdata = btree - > prepareDownlink ( btree , stack - > buffer ) ;
updateblkno = GinPageGetOpaque ( BufferGetPage ( stack - > buffer ) ) - > rightlink ;
done = ginPlaceToPage ( btree , parent ,
insertdata , updateblkno ,
stack - > buffer , buildStats ) ;
pfree ( insertdata ) ;
/*
* If the caller requested to free the stack , unlock and release the
* child buffer now . Otherwise keep it pinned and locked , but if we
* have to recurse up the tree , we can unlock the upper pages , only
* keeping the page at the bottom of the stack locked .
*/
if ( ! first | | freestack )
LockBuffer ( stack - > buffer , GIN_UNLOCK ) ;
if ( freestack )
{
ReleaseBuffer ( stack - > buffer ) ;
pfree ( stack ) ;
}
stack = parent ;
/* insert the downlink to parent */
done = ginPlaceToPage ( btree , stack ,
insertdata , updateblkno ,
buildStats ) ;
pfree ( insertdata ) ;
first = false ;
} while ( ! done ) ;
/* unlock the parent */
LockBuffer ( stack - > buffer , GIN_UNLOCK ) ;
/* free the rest of the stack */
if ( freestack )
freeGinBtreeStack ( stack ) ;
}
@ -540,14 +719,18 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
{
bool done ;
/* If the leaf page was incompletely split, finish the split first */
if ( GinPageIsIncompleteSplit ( BufferGetPage ( stack - > buffer ) ) )
ginFinishSplit ( btree , stack , false , buildStats ) ;
done = ginPlaceToPage ( btree , stack ,
insertdata , InvalidBlockNumber ,
buildStats ) ;
InvalidBuffer , buildStats ) ;
if ( done )
{
LockBuffer ( stack - > buffer , GIN_UNLOCK ) ;
freeGinBtreeStack ( stack ) ;
}
else
ginFinishSplit ( btree , stack , buildStats ) ;
ginFinishSplit ( btree , stack , true , buildStats ) ;
}