@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / access / nbtree / nbtinsert . c , v 1.170 .2 .1 2009 / 10 / 02 21 : 14 : 11 tgl Exp $
* $ PostgreSQL : pgsql / src / backend / access / nbtree / nbtinsert . c , v 1.170 .2 .2 2010 / 08 / 29 19 : 33 : 29 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
@ -72,9 +72,8 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page,
static void _bt_checksplitloc ( FindSplitData * state ,
OffsetNumber firstoldonright , bool newitemonleft ,
int dataitemstoleft , Size firstoldonrightsz ) ;
static void _bt_pgaddtup ( Relation rel , Page page ,
Size itemsize , IndexTuple itup ,
OffsetNumber itup_off , const char * where ) ;
static bool _bt_pgaddtup ( Page page , Size itemsize , IndexTuple itup ,
OffsetNumber itup_off ) ;
static bool _bt_isequal ( TupleDesc itupdesc , Page page , OffsetNumber offnum ,
int keysz , ScanKey scankey ) ;
static void _bt_vacuum_one_page ( Relation rel , Buffer buffer ) ;
@ -651,7 +650,9 @@ _bt_insertonpg(Relation rel,
/* Do the update. No ereport(ERROR) until changes are logged */
START_CRIT_SECTION ( ) ;
_bt_pgaddtup ( rel , page , itemsz , itup , newitemoff , " page " ) ;
if ( ! _bt_pgaddtup ( page , itemsz , itup , newitemoff ) )
elog ( PANIC , " failed to add new item to block %u in index \" %s \" " ,
itup_blkno , RelationGetRelationName ( rel ) ) ;
MarkBufferDirty ( buf ) ;
@ -777,6 +778,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
Page origpage ;
Page leftpage ,
rightpage ;
BlockNumber origpagenumber ,
rightpagenumber ;
BTPageOpaque ropaque ,
lopaque ,
oopaque ;
@ -792,11 +795,27 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
OffsetNumber i ;
bool isroot ;
/* Acquire a new page to split into */
rbuf = _bt_getbuf ( rel , P_NEW , BT_WRITE ) ;
/*
* origpage is the original page to be split . leftpage is a temporary
* buffer that receives the left - sibling data , which will be copied back
* into origpage on success . rightpage is the new page that receives
* the right - sibling data . If we fail before reaching the critical
* section , origpage hasn ' t been modified and leftpage is only workspace .
* In principle we shouldn ' t need to worry about rightpage either ,
* because it hasn ' t been linked into the btree page structure ; but to
* avoid leaving possibly - confusing junk behind , we are careful to rewrite
* rightpage as zeroes before throwing any error .
*/
origpage = BufferGetPage ( buf ) ;
leftpage = PageGetTempPage ( origpage ) ;
rightpage = BufferGetPage ( rbuf ) ;
origpagenumber = BufferGetBlockNumber ( buf ) ;
rightpagenumber = BufferGetBlockNumber ( rbuf ) ;
_bt_pageinit ( leftpage , BufferGetPageSize ( buf ) ) ;
/* rightpage was already initialized by _bt_getbuf */
@ -821,8 +840,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
lopaque - > btpo_flags & = ~ ( BTP_ROOT | BTP_SPLIT_END | BTP_HAS_GARBAGE ) ;
ropaque - > btpo_flags = lopaque - > btpo_flags ;
lopaque - > btpo_prev = oopaque - > btpo_prev ;
lopaque - > btpo_next = BufferGetBlockNumber ( rbuf ) ;
ropaque - > btpo_prev = BufferGetBlockNumber ( buf ) ;
lopaque - > btpo_next = rightpagenumber ;
ropaque - > btpo_prev = origpagenumber ;
ropaque - > btpo_next = oopaque - > btpo_next ;
lopaque - > btpo . level = ropaque - > btpo . level = oopaque - > btpo . level ;
/* Since we already have write-lock on both pages, ok to read cycleid */
@ -845,9 +864,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
item = ( IndexTuple ) PageGetItem ( origpage , itemid ) ;
if ( PageAddItem ( rightpage , ( Item ) item , itemsz , rightoff ,
false , false ) = = InvalidOffsetNumber )
elog ( PANIC , " failed to add hikey to the right sibling "
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add hikey to the right sibling "
" while splitting block %u of index \" %s \" " ,
BufferGetBlockNumber ( buf ) , RelationGetRelationName ( rel ) ) ;
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
rightoff = OffsetNumberNext ( rightoff ) ;
}
@ -872,9 +894,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
}
if ( PageAddItem ( leftpage , ( Item ) item , itemsz , leftoff ,
false , false ) = = InvalidOffsetNumber )
elog ( PANIC , " failed to add hikey to the left sibling "
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add hikey to the left sibling "
" while splitting block %u of index \" %s \" " ,
BufferGetBlockNumber ( buf ) , RelationGetRelationName ( rel ) ) ;
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
leftoff = OffsetNumberNext ( leftoff ) ;
/*
@ -896,14 +921,24 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
{
if ( newitemonleft )
{
_bt_pgaddtup ( rel , leftpage , newitemsz , newitem , leftoff ,
" left sibling " ) ;
if ( ! _bt_pgaddtup ( leftpage , newitemsz , newitem , leftoff ) )
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add new item to the left sibling "
" while splitting block %u of index \" %s \" " ,
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
leftoff = OffsetNumberNext ( leftoff ) ;
}
else
{
_bt_pgaddtup ( rel , rightpage , newitemsz , newitem , rightoff ,
" right sibling " ) ;
if ( ! _bt_pgaddtup ( rightpage , newitemsz , newitem , rightoff ) )
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add new item to the right sibling "
" while splitting block %u of index \" %s \" " ,
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
rightoff = OffsetNumberNext ( rightoff ) ;
}
}
@ -911,14 +946,24 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
/* decide which page to put it on */
if ( i < firstright )
{
_bt_pgaddtup ( rel , leftpage , itemsz , item , leftoff ,
" left sibling " ) ;
if ( ! _bt_pgaddtup ( leftpage , itemsz , item , leftoff ) )
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add old item to the left sibling "
" while splitting block %u of index \" %s \" " ,
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
leftoff = OffsetNumberNext ( leftoff ) ;
}
else
{
_bt_pgaddtup ( rel , rightpage , itemsz , item , rightoff ,
" right sibling " ) ;
if ( ! _bt_pgaddtup ( rightpage , itemsz , item , rightoff ) )
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add old item to the right sibling "
" while splitting block %u of index \" %s \" " ,
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
rightoff = OffsetNumberNext ( rightoff ) ;
}
}
@ -932,8 +977,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* not be splitting the page ) .
*/
Assert ( ! newitemonleft ) ;
_bt_pgaddtup ( rel , rightpage , newitemsz , newitem , rightoff ,
" right sibling " ) ;
if ( ! _bt_pgaddtup ( rightpage , newitemsz , newitem , rightoff ) )
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " failed to add new item to the right sibling "
" while splitting block %u of index \" %s \" " ,
origpagenumber , RelationGetRelationName ( rel ) ) ;
}
rightoff = OffsetNumberNext ( rightoff ) ;
}
@ -945,16 +995,19 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* neighbors .
*/
if ( ! P_RIGHTMOST ( r opaque) )
if ( ! P_RIGHTMOST ( o opaque) )
{
sbuf = _bt_getbuf ( rel , r opaque- > btpo_next , BT_WRITE ) ;
sbuf = _bt_getbuf ( rel , o opaque- > btpo_next , BT_WRITE ) ;
spage = BufferGetPage ( sbuf ) ;
sopaque = ( BTPageOpaque ) PageGetSpecialPointer ( spage ) ;
if ( sopaque - > btpo_prev ! = ropaque - > btpo_prev )
elog ( PANIC , " right sibling's left-link doesn't match: "
" block %u links to %u instead of expected %u in index \" %s \" " ,
ropaque - > btpo_next , sopaque - > btpo_prev , ropaque - > btpo_prev ,
if ( sopaque - > btpo_prev ! = origpagenumber )
{
memset ( rightpage , 0 , BufferGetPageSize ( rbuf ) ) ;
elog ( ERROR , " right sibling's left-link doesn't match: "
" block %u links to %u instead of expected %u in index \" %s \" " ,
oopaque - > btpo_next , sopaque - > btpo_prev , origpagenumber ,
RelationGetRelationName ( rel ) ) ;
}
/*
* Check to see if we can set the SPLIT_END flag in the right - hand
@ -979,8 +1032,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
*
* NO EREPORT ( ERROR ) till right sibling is updated . We can get away with
* not starting the critical section till here because we haven ' t been
* scribbling on the original page yet , and we don ' t care about the new
* sibling until it ' s linked into the btree .
* scribbling on the original page yet ; see comments above .
*/
START_CRIT_SECTION ( ) ;
@ -992,19 +1044,21 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* ( in the page management code ) that the center of a page always be
* clean , and the most efficient way to guarantee this is just to compact
* the data by reinserting it into a new left page . ( XXX the latter
* comment is probably obsolete . )
* comment is probably obsolete ; but in any case it ' s good to not scribble
* on the original page until we enter the critical section . )
*
* We need to do this before writing the WAL record , so that XLogInsert
* can WAL log an image of the page if necessary .
*/
PageRestoreTempPage ( leftpage , origpage ) ;
/* leftpage, lopaque must not be used below here */
MarkBufferDirty ( buf ) ;
MarkBufferDirty ( rbuf ) ;
if ( ! P_RIGHTMOST ( ropaque ) )
{
sopaque - > btpo_prev = BufferGetBlockNumber ( rbuf ) ;
sopaque - > btpo_prev = rightpagenumber ;
MarkBufferDirty ( sbuf ) ;
}
@ -1018,8 +1072,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
XLogRecData * lastrdata ;
xlrec . node = rel - > rd_node ;
xlrec . leftsib = BufferGetBlockNumber ( buf ) ;
xlrec . rightsib = BufferGetBlockNumber ( rbuf ) ;
xlrec . leftsib = origpagenumber ;
xlrec . rightsib = rightpagenumber ;
xlrec . rnext = ropaque - > btpo_next ;
xlrec . level = ropaque - > btpo . level ;
xlrec . firstright = firstright ;
@ -1818,13 +1872,11 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
* we insert the tuples in order , so that the given itup_off does
* represent the final position of the tuple !
*/
static void
_bt_pgaddtup ( Relation rel ,
Page page ,
static bool
_bt_pgaddtup ( Page page ,
Size itemsize ,
IndexTuple itup ,
OffsetNumber itup_off ,
const char * where )
OffsetNumber itup_off )
{
BTPageOpaque opaque = ( BTPageOpaque ) PageGetSpecialPointer ( page ) ;
IndexTupleData trunctuple ;
@ -1839,8 +1891,9 @@ _bt_pgaddtup(Relation rel,
if ( PageAddItem ( page , ( Item ) itup , itemsize , itup_off ,
false , false ) = = InvalidOffsetNumber )
elog ( PANIC , " failed to add item to the %s in index \" %s \" " ,
where , RelationGetRelationName ( rel ) ) ;
return false ;
return true ;
}
/*