mirror of https://github.com/postgres/postgres
The warning about hash indexes not being write-ahead logged and their use being discouraged has been removed. "snapshot too old" is now supported for tables with hash indexes. Most importantly, barring bugs, hash indexes will now be crash-safe and usable on standbys. This commit doesn't yet add WAL consistency checking for hash indexes, as we now have for other index types; a separate patch has been submitted to cure that lack. Amit Kapila, reviewed and slightly modified by me. The larger patch series of which this is a part has been reviewed and tested by Álvaro Herrera, Ashutosh Sharma, Mark Kirkwood, Jeff Janes, and Jesper Pedersen. Discussion: http://postgr.es/m/CAA4eK1JOBX=YU33631Qh-XivYXtPSALh514+jR8XeD7v+K3r_Q@mail.gmail.compull/17/merge
parent
2b32ac2a59
commit
c11453ce0a
@ -0,0 +1,963 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* hash_xlog.c |
||||
* WAL replay logic for hash index. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/access/hash/hash_xlog.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "access/hash.h" |
||||
#include "access/hash_xlog.h" |
||||
#include "access/xlogutils.h" |
||||
|
||||
/*
|
||||
* replay a hash index meta page |
||||
*/ |
||||
static void |
||||
hash_xlog_init_meta_page(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
Page page; |
||||
Buffer metabuf; |
||||
|
||||
xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record); |
||||
|
||||
/* create the index' metapage */ |
||||
metabuf = XLogInitBufferForRedo(record, 0); |
||||
Assert(BufferIsValid(metabuf)); |
||||
_hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid, |
||||
xlrec->ffactor, true); |
||||
page = (Page) BufferGetPage(metabuf); |
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(metabuf); |
||||
/* all done */ |
||||
UnlockReleaseBuffer(metabuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay a hash index bitmap page |
||||
*/ |
||||
static void |
||||
hash_xlog_init_bitmap_page(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
Buffer bitmapbuf; |
||||
Buffer metabuf; |
||||
Page page; |
||||
HashMetaPage metap; |
||||
uint32 num_buckets; |
||||
|
||||
xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record); |
||||
|
||||
/*
|
||||
* Initialize bitmap page |
||||
*/ |
||||
bitmapbuf = XLogInitBufferForRedo(record, 0); |
||||
_hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true); |
||||
PageSetLSN(BufferGetPage(bitmapbuf), lsn); |
||||
MarkBufferDirty(bitmapbuf); |
||||
UnlockReleaseBuffer(bitmapbuf); |
||||
|
||||
/* add the new bitmap page to the metapage's list of bitmaps */ |
||||
if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
/*
|
||||
* Note: in normal operation, we'd update the metapage while still |
||||
* holding lock on the bitmap page. But during replay it's not |
||||
* necessary to hold that lock, since nobody can see it yet; the |
||||
* creating transaction hasn't yet committed. |
||||
*/ |
||||
page = BufferGetPage(metabuf); |
||||
metap = HashPageGetMeta(page); |
||||
|
||||
num_buckets = metap->hashm_maxbucket + 1; |
||||
metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1; |
||||
metap->hashm_nmaps++; |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(metabuf); |
||||
} |
||||
if (BufferIsValid(metabuf)) |
||||
UnlockReleaseBuffer(metabuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay a hash index insert without split |
||||
*/ |
||||
static void |
||||
hash_xlog_insert(XLogReaderState *record) |
||||
{ |
||||
HashMetaPage metap; |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record); |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) |
||||
{ |
||||
Size datalen; |
||||
char *datapos = XLogRecGetBlockData(record, 0, &datalen); |
||||
|
||||
page = BufferGetPage(buffer); |
||||
|
||||
if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum, |
||||
false, false) == InvalidOffsetNumber) |
||||
elog(PANIC, "hash_xlog_insert: failed to add item"); |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(buffer); |
||||
} |
||||
if (BufferIsValid(buffer)) |
||||
UnlockReleaseBuffer(buffer); |
||||
|
||||
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) |
||||
{ |
||||
/*
|
||||
* Note: in normal operation, we'd update the metapage while still |
||||
* holding lock on the page we inserted into. But during replay it's |
||||
* not necessary to hold that lock, since no other index updates can |
||||
* be happening concurrently. |
||||
*/ |
||||
page = BufferGetPage(buffer); |
||||
metap = HashPageGetMeta(page); |
||||
metap->hashm_ntuples += 1; |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(buffer); |
||||
} |
||||
if (BufferIsValid(buffer)) |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
/*
|
||||
* replay addition of overflow page for hash index |
||||
*/ |
||||
static void |
||||
hash_xlog_add_ovfl_page(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record); |
||||
Buffer leftbuf; |
||||
Buffer ovflbuf; |
||||
Buffer metabuf; |
||||
BlockNumber leftblk; |
||||
BlockNumber rightblk; |
||||
BlockNumber newmapblk = InvalidBlockNumber; |
||||
Page ovflpage; |
||||
HashPageOpaque ovflopaque; |
||||
uint32 *num_bucket; |
||||
char *data; |
||||
Size datalen PG_USED_FOR_ASSERTS_ONLY; |
||||
bool new_bmpage = false; |
||||
|
||||
XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk); |
||||
XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk); |
||||
|
||||
ovflbuf = XLogInitBufferForRedo(record, 0); |
||||
Assert(BufferIsValid(ovflbuf)); |
||||
|
||||
data = XLogRecGetBlockData(record, 0, &datalen); |
||||
num_bucket = (uint32 *) data; |
||||
Assert(datalen == sizeof(uint32)); |
||||
_hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE, |
||||
true); |
||||
/* update backlink */ |
||||
ovflpage = BufferGetPage(ovflbuf); |
||||
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); |
||||
ovflopaque->hasho_prevblkno = leftblk; |
||||
|
||||
PageSetLSN(ovflpage, lsn); |
||||
MarkBufferDirty(ovflbuf); |
||||
|
||||
if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page leftpage; |
||||
HashPageOpaque leftopaque; |
||||
|
||||
leftpage = BufferGetPage(leftbuf); |
||||
leftopaque = (HashPageOpaque) PageGetSpecialPointer(leftpage); |
||||
leftopaque->hasho_nextblkno = rightblk; |
||||
|
||||
PageSetLSN(leftpage, lsn); |
||||
MarkBufferDirty(leftbuf); |
||||
} |
||||
|
||||
if (BufferIsValid(leftbuf)) |
||||
UnlockReleaseBuffer(leftbuf); |
||||
UnlockReleaseBuffer(ovflbuf); |
||||
|
||||
/*
|
||||
* Note: in normal operation, we'd update the bitmap and meta page while |
||||
* still holding lock on the overflow pages. But during replay it's not |
||||
* necessary to hold those locks, since no other index updates can be |
||||
* happening concurrently. |
||||
*/ |
||||
if (XLogRecHasBlockRef(record, 2)) |
||||
{ |
||||
Buffer mapbuffer; |
||||
|
||||
if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page mappage = (Page) BufferGetPage(mapbuffer); |
||||
uint32 *freep = NULL; |
||||
char *data; |
||||
uint32 *bitmap_page_bit; |
||||
|
||||
freep = HashPageGetBitmap(mappage); |
||||
|
||||
data = XLogRecGetBlockData(record, 2, &datalen); |
||||
bitmap_page_bit = (uint32 *) data; |
||||
|
||||
SETBIT(freep, *bitmap_page_bit); |
||||
|
||||
PageSetLSN(mappage, lsn); |
||||
MarkBufferDirty(mapbuffer); |
||||
} |
||||
if (BufferIsValid(mapbuffer)) |
||||
UnlockReleaseBuffer(mapbuffer); |
||||
} |
||||
|
||||
if (XLogRecHasBlockRef(record, 3)) |
||||
{ |
||||
Buffer newmapbuf; |
||||
|
||||
newmapbuf = XLogInitBufferForRedo(record, 3); |
||||
|
||||
_hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true); |
||||
|
||||
new_bmpage = true; |
||||
newmapblk = BufferGetBlockNumber(newmapbuf); |
||||
|
||||
MarkBufferDirty(newmapbuf); |
||||
PageSetLSN(BufferGetPage(newmapbuf), lsn); |
||||
|
||||
UnlockReleaseBuffer(newmapbuf); |
||||
} |
||||
|
||||
if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
HashMetaPage metap; |
||||
Page page; |
||||
uint32 *firstfree_ovflpage; |
||||
|
||||
data = XLogRecGetBlockData(record, 4, &datalen); |
||||
firstfree_ovflpage = (uint32 *) data; |
||||
|
||||
page = BufferGetPage(metabuf); |
||||
metap = HashPageGetMeta(page); |
||||
metap->hashm_firstfree = *firstfree_ovflpage; |
||||
|
||||
if (!xlrec->bmpage_found) |
||||
{ |
||||
metap->hashm_spares[metap->hashm_ovflpoint]++; |
||||
|
||||
if (new_bmpage) |
||||
{ |
||||
Assert(BlockNumberIsValid(newmapblk)); |
||||
|
||||
metap->hashm_mapp[metap->hashm_nmaps] = newmapblk; |
||||
metap->hashm_nmaps++; |
||||
metap->hashm_spares[metap->hashm_ovflpoint]++; |
||||
} |
||||
} |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(metabuf); |
||||
} |
||||
if (BufferIsValid(metabuf)) |
||||
UnlockReleaseBuffer(metabuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay allocation of page for split operation |
||||
*/ |
||||
static void |
||||
hash_xlog_split_allocate_page(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record); |
||||
Buffer oldbuf; |
||||
Buffer newbuf; |
||||
Buffer metabuf; |
||||
Size datalen PG_USED_FOR_ASSERTS_ONLY; |
||||
char *data; |
||||
XLogRedoAction action; |
||||
|
||||
/*
|
||||
* To be consistent with normal operation, here we take cleanup locks on |
||||
* both the old and new buckets even though there can't be any concurrent |
||||
* inserts. |
||||
*/ |
||||
|
||||
/* replay the record for old bucket */ |
||||
action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf); |
||||
|
||||
/*
|
||||
* Note that we still update the page even if it was restored from a full |
||||
* page image, because the special space is not included in the image. |
||||
*/ |
||||
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED) |
||||
{ |
||||
Page oldpage; |
||||
HashPageOpaque oldopaque; |
||||
|
||||
oldpage = BufferGetPage(oldbuf); |
||||
oldopaque = (HashPageOpaque) PageGetSpecialPointer(oldpage); |
||||
|
||||
oldopaque->hasho_flag = xlrec->old_bucket_flag; |
||||
oldopaque->hasho_prevblkno = xlrec->new_bucket; |
||||
|
||||
PageSetLSN(oldpage, lsn); |
||||
MarkBufferDirty(oldbuf); |
||||
} |
||||
|
||||
/* replay the record for new bucket */ |
||||
newbuf = XLogInitBufferForRedo(record, 1); |
||||
_hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket, |
||||
xlrec->new_bucket_flag, true); |
||||
if (!IsBufferCleanupOK(newbuf)) |
||||
elog(PANIC, "hash_xlog_split_allocate_page: failed to acquire cleanup lock"); |
||||
MarkBufferDirty(newbuf); |
||||
PageSetLSN(BufferGetPage(newbuf), lsn); |
||||
|
||||
/*
|
||||
* We can release the lock on old bucket early as well but doing here to |
||||
* consistent with normal operation. |
||||
*/ |
||||
if (BufferIsValid(oldbuf)) |
||||
UnlockReleaseBuffer(oldbuf); |
||||
if (BufferIsValid(newbuf)) |
||||
UnlockReleaseBuffer(newbuf); |
||||
|
||||
/*
|
||||
* Note: in normal operation, we'd update the meta page while still |
||||
* holding lock on the old and new bucket pages. But during replay it's |
||||
* not necessary to hold those locks, since no other bucket splits can be |
||||
* happening concurrently. |
||||
*/ |
||||
|
||||
/* replay the record for metapage changes */ |
||||
if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page page; |
||||
HashMetaPage metap; |
||||
|
||||
page = BufferGetPage(metabuf); |
||||
metap = HashPageGetMeta(page); |
||||
metap->hashm_maxbucket = xlrec->new_bucket; |
||||
|
||||
data = XLogRecGetBlockData(record, 2, &datalen); |
||||
|
||||
if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS) |
||||
{ |
||||
uint32 lowmask; |
||||
uint32 *highmask; |
||||
|
||||
/* extract low and high masks. */ |
||||
memcpy(&lowmask, data, sizeof(uint32)); |
||||
highmask = (uint32 *) ((char *) data + sizeof(uint32)); |
||||
|
||||
/* update metapage */ |
||||
metap->hashm_lowmask = lowmask; |
||||
metap->hashm_highmask = *highmask; |
||||
|
||||
data += sizeof(uint32) * 2; |
||||
} |
||||
|
||||
if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT) |
||||
{ |
||||
uint32 ovflpoint; |
||||
uint32 *ovflpages; |
||||
|
||||
/* extract information of overflow pages. */ |
||||
memcpy(&ovflpoint, data, sizeof(uint32)); |
||||
ovflpages = (uint32 *) ((char *) data + sizeof(uint32)); |
||||
|
||||
/* update metapage */ |
||||
metap->hashm_spares[ovflpoint] = *ovflpages; |
||||
metap->hashm_ovflpoint = ovflpoint; |
||||
} |
||||
|
||||
MarkBufferDirty(metabuf); |
||||
PageSetLSN(BufferGetPage(metabuf), lsn); |
||||
} |
||||
|
||||
if (BufferIsValid(metabuf)) |
||||
UnlockReleaseBuffer(metabuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay of split operation |
||||
*/ |
||||
static void |
||||
hash_xlog_split_page(XLogReaderState *record) |
||||
{ |
||||
Buffer buf; |
||||
|
||||
if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED) |
||||
elog(ERROR, "Hash split record did not contain a full-page image"); |
||||
|
||||
UnlockReleaseBuffer(buf); |
||||
} |
||||
|
||||
/*
|
||||
* replay completion of split operation |
||||
*/ |
||||
static void |
||||
hash_xlog_split_complete(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record); |
||||
Buffer oldbuf; |
||||
Buffer newbuf; |
||||
XLogRedoAction action; |
||||
|
||||
/* replay the record for old bucket */ |
||||
action = XLogReadBufferForRedo(record, 0, &oldbuf); |
||||
|
||||
/*
|
||||
* Note that we still update the page even if it was restored from a full |
||||
* page image, because the bucket flag is not included in the image. |
||||
*/ |
||||
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED) |
||||
{ |
||||
Page oldpage; |
||||
HashPageOpaque oldopaque; |
||||
|
||||
oldpage = BufferGetPage(oldbuf); |
||||
oldopaque = (HashPageOpaque) PageGetSpecialPointer(oldpage); |
||||
|
||||
oldopaque->hasho_flag = xlrec->old_bucket_flag; |
||||
|
||||
PageSetLSN(oldpage, lsn); |
||||
MarkBufferDirty(oldbuf); |
||||
} |
||||
if (BufferIsValid(oldbuf)) |
||||
UnlockReleaseBuffer(oldbuf); |
||||
|
||||
/* replay the record for new bucket */ |
||||
action = XLogReadBufferForRedo(record, 1, &newbuf); |
||||
|
||||
/*
|
||||
* Note that we still update the page even if it was restored from a full |
||||
* page image, because the bucket flag is not included in the image. |
||||
*/ |
||||
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED) |
||||
{ |
||||
Page newpage; |
||||
HashPageOpaque nopaque; |
||||
|
||||
newpage = BufferGetPage(newbuf); |
||||
nopaque = (HashPageOpaque) PageGetSpecialPointer(newpage); |
||||
|
||||
nopaque->hasho_flag = xlrec->new_bucket_flag; |
||||
|
||||
PageSetLSN(newpage, lsn); |
||||
MarkBufferDirty(newbuf); |
||||
} |
||||
if (BufferIsValid(newbuf)) |
||||
UnlockReleaseBuffer(newbuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay move of page contents for squeeze operation of hash index |
||||
*/ |
||||
static void |
||||
hash_xlog_move_page_contents(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record); |
||||
Buffer bucketbuf = InvalidBuffer; |
||||
Buffer writebuf = InvalidBuffer; |
||||
Buffer deletebuf = InvalidBuffer; |
||||
XLogRedoAction action; |
||||
|
||||
/*
|
||||
* Ensure we have a cleanup lock on primary bucket page before we start |
||||
* with the actual replay operation. This is to ensure that neither a |
||||
* scan can start nor a scan can be already-in-progress during the replay |
||||
* of this operation. If we allow scans during this operation, then they |
||||
* can miss some records or show the same record multiple times. |
||||
*/ |
||||
if (xldata->is_prim_bucket_same_wrt) |
||||
action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf); |
||||
else |
||||
{ |
||||
/*
|
||||
* we don't care for return value as the purpose of reading bucketbuf |
||||
* is to ensure a cleanup lock on primary bucket page. |
||||
*/ |
||||
(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf); |
||||
|
||||
action = XLogReadBufferForRedo(record, 1, &writebuf); |
||||
} |
||||
|
||||
/* replay the record for adding entries in overflow buffer */ |
||||
if (action == BLK_NEEDS_REDO) |
||||
{ |
||||
Page writepage; |
||||
char *begin; |
||||
char *data; |
||||
Size datalen; |
||||
uint16 ninserted = 0; |
||||
|
||||
data = begin = XLogRecGetBlockData(record, 1, &datalen); |
||||
|
||||
writepage = (Page) BufferGetPage(writebuf); |
||||
|
||||
if (xldata->ntups > 0) |
||||
{ |
||||
OffsetNumber *towrite = (OffsetNumber *) data; |
||||
|
||||
data += sizeof(OffsetNumber) * xldata->ntups; |
||||
|
||||
while (data - begin < datalen) |
||||
{ |
||||
IndexTuple itup = (IndexTuple) data; |
||||
Size itemsz; |
||||
OffsetNumber l; |
||||
|
||||
itemsz = IndexTupleDSize(*itup); |
||||
itemsz = MAXALIGN(itemsz); |
||||
|
||||
data += itemsz; |
||||
|
||||
l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false); |
||||
if (l == InvalidOffsetNumber) |
||||
elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes", |
||||
(int) itemsz); |
||||
|
||||
ninserted++; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* number of tuples inserted must be same as requested in REDO record. |
||||
*/ |
||||
Assert(ninserted == xldata->ntups); |
||||
|
||||
PageSetLSN(writepage, lsn); |
||||
MarkBufferDirty(writebuf); |
||||
} |
||||
|
||||
/* replay the record for deleting entries from overflow buffer */ |
||||
if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page page; |
||||
char *ptr; |
||||
Size len; |
||||
|
||||
ptr = XLogRecGetBlockData(record, 2, &len); |
||||
|
||||
page = (Page) BufferGetPage(deletebuf); |
||||
|
||||
if (len > 0) |
||||
{ |
||||
OffsetNumber *unused; |
||||
OffsetNumber *unend; |
||||
|
||||
unused = (OffsetNumber *) ptr; |
||||
unend = (OffsetNumber *) ((char *) ptr + len); |
||||
|
||||
if ((unend - unused) > 0) |
||||
PageIndexMultiDelete(page, unused, unend - unused); |
||||
} |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(deletebuf); |
||||
} |
||||
|
||||
/*
|
||||
* Replay is complete, now we can release the buffers. We release locks at |
||||
* end of replay operation to ensure that we hold lock on primary bucket |
||||
* page till end of operation. We can optimize by releasing the lock on |
||||
* write buffer as soon as the operation for same is complete, if it is |
||||
* not same as primary bucket page, but that doesn't seem to be worth |
||||
* complicating the code. |
||||
*/ |
||||
if (BufferIsValid(deletebuf)) |
||||
UnlockReleaseBuffer(deletebuf); |
||||
|
||||
if (BufferIsValid(writebuf)) |
||||
UnlockReleaseBuffer(writebuf); |
||||
|
||||
if (BufferIsValid(bucketbuf)) |
||||
UnlockReleaseBuffer(bucketbuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay squeeze page operation of hash index |
||||
*/ |
||||
static void |
||||
hash_xlog_squeeze_page(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record); |
||||
Buffer bucketbuf = InvalidBuffer; |
||||
Buffer writebuf; |
||||
Buffer ovflbuf; |
||||
Buffer prevbuf = InvalidBuffer; |
||||
Buffer mapbuf; |
||||
XLogRedoAction action; |
||||
|
||||
/*
|
||||
* Ensure we have a cleanup lock on primary bucket page before we start |
||||
* with the actual replay operation. This is to ensure that neither a |
||||
* scan can start nor a scan can be already-in-progress during the replay |
||||
* of this operation. If we allow scans during this operation, then they |
||||
* can miss some records or show the same record multiple times. |
||||
*/ |
||||
if (xldata->is_prim_bucket_same_wrt) |
||||
action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf); |
||||
else |
||||
{ |
||||
/*
|
||||
* we don't care for return value as the purpose of reading bucketbuf |
||||
* is to ensure a cleanup lock on primary bucket page. |
||||
*/ |
||||
(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf); |
||||
|
||||
action = XLogReadBufferForRedo(record, 1, &writebuf); |
||||
} |
||||
|
||||
/* replay the record for adding entries in overflow buffer */ |
||||
if (action == BLK_NEEDS_REDO) |
||||
{ |
||||
Page writepage; |
||||
char *begin; |
||||
char *data; |
||||
Size datalen; |
||||
uint16 ninserted = 0; |
||||
|
||||
data = begin = XLogRecGetBlockData(record, 1, &datalen); |
||||
|
||||
writepage = (Page) BufferGetPage(writebuf); |
||||
|
||||
if (xldata->ntups > 0) |
||||
{ |
||||
OffsetNumber *towrite = (OffsetNumber *) data; |
||||
|
||||
data += sizeof(OffsetNumber) * xldata->ntups; |
||||
|
||||
while (data - begin < datalen) |
||||
{ |
||||
IndexTuple itup = (IndexTuple) data; |
||||
Size itemsz; |
||||
OffsetNumber l; |
||||
|
||||
itemsz = IndexTupleDSize(*itup); |
||||
itemsz = MAXALIGN(itemsz); |
||||
|
||||
data += itemsz; |
||||
|
||||
l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false); |
||||
if (l == InvalidOffsetNumber) |
||||
elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes", |
||||
(int) itemsz); |
||||
|
||||
ninserted++; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* number of tuples inserted must be same as requested in REDO record. |
||||
*/ |
||||
Assert(ninserted == xldata->ntups); |
||||
|
||||
/*
|
||||
* if the page on which are adding tuples is a page previous to freed |
||||
* overflow page, then update its nextblno. |
||||
*/ |
||||
if (xldata->is_prev_bucket_same_wrt) |
||||
{ |
||||
HashPageOpaque writeopaque = (HashPageOpaque) PageGetSpecialPointer(writepage); |
||||
|
||||
writeopaque->hasho_nextblkno = xldata->nextblkno; |
||||
} |
||||
|
||||
PageSetLSN(writepage, lsn); |
||||
MarkBufferDirty(writebuf); |
||||
} |
||||
|
||||
/* replay the record for initializing overflow buffer */ |
||||
if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page ovflpage; |
||||
|
||||
ovflpage = BufferGetPage(ovflbuf); |
||||
|
||||
_hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); |
||||
|
||||
PageSetLSN(ovflpage, lsn); |
||||
MarkBufferDirty(ovflbuf); |
||||
} |
||||
if (BufferIsValid(ovflbuf)) |
||||
UnlockReleaseBuffer(ovflbuf); |
||||
|
||||
/* replay the record for page previous to the freed overflow page */ |
||||
if (!xldata->is_prev_bucket_same_wrt && |
||||
XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page prevpage = BufferGetPage(prevbuf); |
||||
HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage); |
||||
|
||||
prevopaque->hasho_nextblkno = xldata->nextblkno; |
||||
|
||||
PageSetLSN(prevpage, lsn); |
||||
MarkBufferDirty(prevbuf); |
||||
} |
||||
if (BufferIsValid(prevbuf)) |
||||
UnlockReleaseBuffer(prevbuf); |
||||
|
||||
/* replay the record for page next to the freed overflow page */ |
||||
if (XLogRecHasBlockRef(record, 4)) |
||||
{ |
||||
Buffer nextbuf; |
||||
|
||||
if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page nextpage = BufferGetPage(nextbuf); |
||||
HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage); |
||||
|
||||
nextopaque->hasho_prevblkno = xldata->prevblkno; |
||||
|
||||
PageSetLSN(nextpage, lsn); |
||||
MarkBufferDirty(nextbuf); |
||||
} |
||||
if (BufferIsValid(nextbuf)) |
||||
UnlockReleaseBuffer(nextbuf); |
||||
} |
||||
|
||||
if (BufferIsValid(writebuf)) |
||||
UnlockReleaseBuffer(writebuf); |
||||
|
||||
if (BufferIsValid(bucketbuf)) |
||||
UnlockReleaseBuffer(bucketbuf); |
||||
|
||||
/*
|
||||
* Note: in normal operation, we'd update the bitmap and meta page while |
||||
* still holding lock on the primary bucket page and overflow pages. But |
||||
* during replay it's not necessary to hold those locks, since no other |
||||
* index updates can be happening concurrently. |
||||
*/ |
||||
/* replay the record for bitmap page */ |
||||
if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
Page mappage = (Page) BufferGetPage(mapbuf); |
||||
uint32 *freep = NULL; |
||||
char *data; |
||||
uint32 *bitmap_page_bit; |
||||
Size datalen; |
||||
|
||||
freep = HashPageGetBitmap(mappage); |
||||
|
||||
data = XLogRecGetBlockData(record, 5, &datalen); |
||||
bitmap_page_bit = (uint32 *) data; |
||||
|
||||
CLRBIT(freep, *bitmap_page_bit); |
||||
|
||||
PageSetLSN(mappage, lsn); |
||||
MarkBufferDirty(mapbuf); |
||||
} |
||||
if (BufferIsValid(mapbuf)) |
||||
UnlockReleaseBuffer(mapbuf); |
||||
|
||||
/* replay the record for meta page */ |
||||
if (XLogRecHasBlockRef(record, 6)) |
||||
{ |
||||
Buffer metabuf; |
||||
|
||||
if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
HashMetaPage metap; |
||||
Page page; |
||||
char *data; |
||||
uint32 *firstfree_ovflpage; |
||||
Size datalen; |
||||
|
||||
data = XLogRecGetBlockData(record, 6, &datalen); |
||||
firstfree_ovflpage = (uint32 *) data; |
||||
|
||||
page = BufferGetPage(metabuf); |
||||
metap = HashPageGetMeta(page); |
||||
metap->hashm_firstfree = *firstfree_ovflpage; |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(metabuf); |
||||
} |
||||
if (BufferIsValid(metabuf)) |
||||
UnlockReleaseBuffer(metabuf); |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* replay delete operation of hash index |
||||
*/ |
||||
static void |
||||
hash_xlog_delete(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record); |
||||
Buffer bucketbuf = InvalidBuffer; |
||||
Buffer deletebuf; |
||||
Page page; |
||||
XLogRedoAction action; |
||||
|
||||
/*
|
||||
* Ensure we have a cleanup lock on primary bucket page before we start |
||||
* with the actual replay operation. This is to ensure that neither a |
||||
* scan can start nor a scan can be already-in-progress during the replay |
||||
* of this operation. If we allow scans during this operation, then they |
||||
* can miss some records or show the same record multiple times. |
||||
*/ |
||||
if (xldata->is_primary_bucket_page) |
||||
action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf); |
||||
else |
||||
{ |
||||
/*
|
||||
* we don't care for return value as the purpose of reading bucketbuf |
||||
* is to ensure a cleanup lock on primary bucket page. |
||||
*/ |
||||
(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf); |
||||
|
||||
action = XLogReadBufferForRedo(record, 1, &deletebuf); |
||||
} |
||||
|
||||
/* replay the record for deleting entries in bucket page */ |
||||
if (action == BLK_NEEDS_REDO) |
||||
{ |
||||
char *ptr; |
||||
Size len; |
||||
|
||||
ptr = XLogRecGetBlockData(record, 1, &len); |
||||
|
||||
page = (Page) BufferGetPage(deletebuf); |
||||
|
||||
if (len > 0) |
||||
{ |
||||
OffsetNumber *unused; |
||||
OffsetNumber *unend; |
||||
|
||||
unused = (OffsetNumber *) ptr; |
||||
unend = (OffsetNumber *) ((char *) ptr + len); |
||||
|
||||
if ((unend - unused) > 0) |
||||
PageIndexMultiDelete(page, unused, unend - unused); |
||||
} |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(deletebuf); |
||||
} |
||||
if (BufferIsValid(deletebuf)) |
||||
UnlockReleaseBuffer(deletebuf); |
||||
|
||||
if (BufferIsValid(bucketbuf)) |
||||
UnlockReleaseBuffer(bucketbuf); |
||||
} |
||||
|
||||
/*
|
||||
* replay split cleanup flag operation for primary bucket page. |
||||
*/ |
||||
static void |
||||
hash_xlog_split_cleanup(XLogReaderState *record) |
||||
{ |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) |
||||
{ |
||||
HashPageOpaque bucket_opaque; |
||||
|
||||
page = (Page) BufferGetPage(buffer); |
||||
|
||||
bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page); |
||||
bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP; |
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(buffer); |
||||
} |
||||
if (BufferIsValid(buffer)) |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
/*
|
||||
* replay for update meta page |
||||
*/ |
||||
static void |
||||
hash_xlog_update_meta_page(XLogReaderState *record) |
||||
{ |
||||
HashMetaPage metap; |
||||
XLogRecPtr lsn = record->EndRecPtr; |
||||
xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record); |
||||
Buffer metabuf; |
||||
Page page; |
||||
|
||||
if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO) |
||||
{ |
||||
page = BufferGetPage(metabuf); |
||||
metap = HashPageGetMeta(page); |
||||
|
||||
metap->hashm_ntuples = xldata->ntuples; |
||||
|
||||
PageSetLSN(page, lsn); |
||||
MarkBufferDirty(metabuf); |
||||
} |
||||
if (BufferIsValid(metabuf)) |
||||
UnlockReleaseBuffer(metabuf); |
||||
} |
||||
|
||||
void |
||||
hash_redo(XLogReaderState *record) |
||||
{ |
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; |
||||
|
||||
switch (info) |
||||
{ |
||||
case XLOG_HASH_INIT_META_PAGE: |
||||
hash_xlog_init_meta_page(record); |
||||
break; |
||||
case XLOG_HASH_INIT_BITMAP_PAGE: |
||||
hash_xlog_init_bitmap_page(record); |
||||
break; |
||||
case XLOG_HASH_INSERT: |
||||
hash_xlog_insert(record); |
||||
break; |
||||
case XLOG_HASH_ADD_OVFL_PAGE: |
||||
hash_xlog_add_ovfl_page(record); |
||||
break; |
||||
case XLOG_HASH_SPLIT_ALLOCATE_PAGE: |
||||
hash_xlog_split_allocate_page(record); |
||||
break; |
||||
case XLOG_HASH_SPLIT_PAGE: |
||||
hash_xlog_split_page(record); |
||||
break; |
||||
case XLOG_HASH_SPLIT_COMPLETE: |
||||
hash_xlog_split_complete(record); |
||||
break; |
||||
case XLOG_HASH_MOVE_PAGE_CONTENTS: |
||||
hash_xlog_move_page_contents(record); |
||||
break; |
||||
case XLOG_HASH_SQUEEZE_PAGE: |
||||
hash_xlog_squeeze_page(record); |
||||
break; |
||||
case XLOG_HASH_DELETE: |
||||
hash_xlog_delete(record); |
||||
break; |
||||
case XLOG_HASH_SPLIT_CLEANUP: |
||||
hash_xlog_split_cleanup(record); |
||||
break; |
||||
case XLOG_HASH_UPDATE_META_PAGE: |
||||
hash_xlog_update_meta_page(record); |
||||
break; |
||||
default: |
||||
elog(PANIC, "hash_redo: unknown op code %u", info); |
||||
} |
||||
} |
Loading…
Reference in new issue