Introduce WAL records to log reuse of btree pages, allowing conflict

resolution during Hot Standby. Page reuse interlock requested by Tom.
Analysis and patch by me.
REL9_0_ALPHA4_BRANCH
Simon Riggs 16 years ago
parent 4688869f41
commit fafa374f2d
  1. 58
      src/backend/access/nbtree/nbtpage.c
  2. 60
      src/backend/access/nbtree/nbtxlog.c
  3. 15
      src/include/access/nbtree.h

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf)
errhint("Please REINDEX it.")));
}
/*
* Log the reuse of a page from the FSM.
*/
static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
if (rel->rd_istemp)
return;
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/*
* We don't do MarkBufferDirty here because we're about initialise
* the page, and nobody else can see it yet.
*/
/* XLOG stuff */
{
XLogRecPtr recptr;
XLogRecData rdata[1];
xl_btree_reuse_page xlrec_reuse;
xlrec_reuse.node = rel->rd_node;
xlrec_reuse.block = blkno;
xlrec_reuse.latestRemovedXid = latestRemovedXid;
rdata[0].data = (char *) &xlrec_reuse;
rdata[0].len = SizeOfBtreeReusePage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
/*
* We don't do PageSetLSN or PageSetTLI here because
* we're about initialise the page, so no need.
*/
}
END_CRIT_SECTION();
}
/*
* _bt_getbuf() -- Get a buffer by block number for read or write.
*
@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
{
page = BufferGetPage(buf);
if (_bt_page_recyclable(page))
{
{
/*
* If we are generating WAL for Hot Standby then create
* a WAL record that will allow us to conflict with
* queries running on standby.
*/
if (XLogStandbyInfoActive())
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
_bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
}
/* Okay to use page. Re-initialize and return it */
_bt_pageinit(page, BufferGetPageSize(buf));
return buf;

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/*
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
if (InHotStandby && info == XLOG_BTREE_DELETE)
if (InHotStandby)
{
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
switch (info)
{
case XLOG_BTREE_DELETE:
/*
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
{
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
/*
* XXX Currently we put everybody on death row, because
* currently _bt_delitems() supplies InvalidTransactionId.
* This can be fairly painful, so providing a better value
* here is worth some thought and possibly some effort to
* improve.
*/
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
/*
* XXX Currently we put everybody on death row, because
* currently _bt_delitems() supplies InvalidTransactionId.
* This can be fairly painful, so providing a better value
* here is worth some thought and possibly some effort to
* improve.
*/
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
break;
case XLOG_BTREE_REUSE_PAGE:
/*
* Btree reuse page records exist to provide a conflict point when we
* reuse pages in the index via the FSM. That's all it does though.
*/
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
return;
default:
break;
}
}
/*

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@ -221,6 +221,7 @@ typedef struct BTMetaPageData
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
* parent half-dead */
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
/*
* All that we need to find changed index tuple
@ -321,6 +322,18 @@ typedef struct xl_btree_delete
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
/*
* This is what we need to know about page reuse within btree.
*/
typedef struct xl_btree_reuse_page
{
RelFileNode node;
BlockNumber block;
TransactionId latestRemovedXid;
} xl_btree_reuse_page;
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
/*
* This is what we need to know about vacuum of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a

Loading…
Cancel
Save