Fix using too many LWLocks bug, reported by Craig Ringer

<craig@postnewspapers.com.au>.
It was my mistake, I missed limitation of number of held locks, now GIN doesn't
use continiuous locks, but still hold buffers pinned to prevent interference
with vacuum's deletion algorithm.

Backpatch is needed.
REL8_5_ALPHA1_BRANCH
Teodor Sigaev 17 years ago
parent fc507eb0b6
commit cf23b75b4d
  1. 220
      src/backend/access/gin/ginget.c
  2. 12
      src/include/access/gin.h

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.13 2008/04/14 17:05:33 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.14 2008/04/22 17:52:43 teodor Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -29,58 +29,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
/* page was deleted by concurrent vacuum */ /* page was deleted by concurrent vacuum */
return false; return false;
if (*off > maxoff || *off == InvalidOffsetNumber)
res = -1;
else
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
if (res == 0)
{
/* page isn't changed */
return true;
}
else if (res > 0)
{
/* /*
* some items was added before our position, look further to find it * scan page to find equal or first greater value
* or first greater
*/
(*off)++;
for (; *off <= maxoff; (*off)++)
{
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
if (res == 0)
return true;
if (res < 0)
{
(*off)--;
return true;
}
}
}
else
{
/*
* some items was deleted before our position, look from begining to
* find it or first greater
*/ */
for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
{ {
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
if (res == 0) if (res <= 0)
return true; return true;
if (res < 0)
{
(*off)--;
return true;
}
}
} }
return false; return false;
@ -91,24 +49,23 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
* Stop* functions unlock buffer (but don't release!) * Stop* functions unlock buffer (but don't release!)
*/ */
static void static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall) startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
{ {
GinBtreeData btreeEntry;
GinBtreeStack *stackEntry;
Page page;
bool needUnlock = TRUE;
if (entry->master != NULL) if (entry->master != NULL)
{ {
entry->isFinished = entry->master->isFinished; entry->isFinished = entry->master->isFinished;
return; return;
} }
if (firstCall)
{
/* /*
* at first call we should find entry, and begin scan of posting tree * we should find entry, and begin scan of posting tree
* or just store posting list in memory * or just store posting list in memory
*/ */
GinBtreeData btreeEntry;
GinBtreeStack *stackEntry;
Page page;
bool needUnlock = TRUE;
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
btreeEntry.searchMode = TRUE; btreeEntry.searchMode = TRUE;
@ -138,11 +95,25 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
entry->buffer = scanBeginPostingTree(gdi); entry->buffer = scanBeginPostingTree(gdi);
/*
* We keep buffer pinned because we need to prevent deletition
* page during scan. See GIN's vacuum implementation. RefCount
* is increased to keep buffer pinned after freeGinBtreeStack() call.
*/
IncrBufferRefCount(entry->buffer); IncrBufferRefCount(entry->buffer);
page = BufferGetPage(entry->buffer); page = BufferGetPage(entry->buffer);
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
/*
* Keep page content in memory to prevent durable page locking
*/
entry->list = (ItemPointerData *) palloc( BLCKSZ );
entry->nlist = GinPageGetOpaque(page)->maxoff;
memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
LockBuffer(entry->buffer, GIN_UNLOCK);
freeGinBtreeStack(gdi->stack); freeGinBtreeStack(gdi->stack);
pfree(gdi); pfree(gdi);
entry->isFinished = FALSE; entry->isFinished = FALSE;
@ -159,51 +130,6 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
if (needUnlock) if (needUnlock)
LockBuffer(stackEntry->buffer, GIN_UNLOCK); LockBuffer(stackEntry->buffer, GIN_UNLOCK);
freeGinBtreeStack(stackEntry); freeGinBtreeStack(stackEntry);
}
else if (entry->buffer != InvalidBuffer)
{
/* we should find place where we was stopped */
BlockNumber blkno;
Page page;
LockBuffer(entry->buffer, GIN_SHARE);
if (!ItemPointerIsValid(&entry->curItem))
/* start position */
return;
Assert(entry->offset != InvalidOffsetNumber);
page = BufferGetPage(entry->buffer);
/* try to find curItem in current buffer */
if (findItemInPage(page, &entry->curItem, &entry->offset))
return;
/* walk to right */
while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
{
LockBuffer(entry->buffer, GIN_UNLOCK);
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
entry->offset = InvalidOffsetNumber;
if (findItemInPage(page, &entry->curItem, &entry->offset))
return;
}
/*
* curItem and any greated items was deleted by concurrent vacuum, so
* we finished scan with currrent entry
*/
}
}
static void
stopScanEntry(GinScanEntry entry)
{
if (entry->buffer != InvalidBuffer)
LockBuffer(entry->buffer, GIN_UNLOCK);
} }
static void static void
@ -211,11 +137,12 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
{ {
uint32 i; uint32 i;
if (!key->firstCall)
return;
for (i = 0; i < key->nentries; i++) for (i = 0; i < key->nentries; i++)
startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall); startScanEntry(index, ginstate, key->scanEntry + i);
if (key->firstCall)
{
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
key->isFinished = FALSE; key->isFinished = FALSE;
key->firstCall = FALSE; key->firstCall = FALSE;
@ -240,16 +167,6 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
key->scanEntry[i].reduceResult = TRUE; key->scanEntry[i].reduceResult = TRUE;
} }
} }
}
}
static void
stopScanKey(GinScanKey key)
{
uint32 i;
for (i = 0; i < key->nentries; i++)
stopScanEntry(key->scanEntry + i);
} }
static void static void
@ -262,44 +179,82 @@ startScan(IndexScanDesc scan)
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i); startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
} }
static void /*
stopScan(IndexScanDesc scan) * Gets next ItemPointer from PostingTree. Note, that we copy
{ * page into GinScanEntry->list array and unlock page, but keep it pinned
uint32 i; * to prevent interference with vacuum
GinScanOpaque so = (GinScanOpaque) scan->opaque; */
for (i = 0; i < so->nkeys; i++)
stopScanKey(so->keys + i);
}
static void static void
entryGetNextItem(Relation index, GinScanEntry entry) entryGetNextItem(Relation index, GinScanEntry entry)
{ {
Page page = BufferGetPage(entry->buffer); Page page;
BlockNumber blkno;
for(;;)
{
entry->offset++; entry->offset++;
if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
if (entry->offset <= entry->nlist)
{ {
entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset); entry->curItem = entry->list[entry->offset - 1];
return;
} }
else
LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
for(;;)
{ {
BlockNumber blkno = GinPageGetOpaque(page)->rightlink; /*
* It's needed to go by right link. During that we should refind
* first ItemPointer greater that stored
*/
blkno = GinPageGetOpaque(page)->rightlink;
LockBuffer(entry->buffer, GIN_UNLOCK); LockBuffer(entry->buffer, GIN_UNLOCK);
if (blkno == InvalidBlockNumber) if (blkno == InvalidBlockNumber)
{ {
ReleaseBuffer(entry->buffer); ReleaseBuffer(entry->buffer);
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
entry->buffer = InvalidBuffer; entry->buffer = InvalidBuffer;
entry->isFinished = TRUE; entry->isFinished = TRUE;
return;
} }
else
{
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
LockBuffer(entry->buffer, GIN_SHARE); LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
entry->offset = InvalidOffsetNumber; entry->offset = InvalidOffsetNumber;
entryGetNextItem(index, entry); if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset))
{
/*
* Found position equal to or greater than stored
*/
entry->nlist = GinPageGetOpaque(page)->maxoff;
memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
LockBuffer(entry->buffer, GIN_UNLOCK);
if ( !ItemPointerIsValid(&entry->curItem) ||
compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
{
/*
* First pages are deleted or empty, or we found exact position,
* so break inner loop and continue outer one.
*/
break;
}
/*
* Find greater than entry->curItem position, store it.
*/
entry->curItem = entry->list[entry->offset - 1];
return;
}
} }
} }
} }
@ -319,7 +274,7 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->isFinished = entry->master->isFinished; entry->isFinished = entry->master->isFinished;
entry->curItem = entry->master->curItem; entry->curItem = entry->master->curItem;
} }
else if (entry->list) else if (!BufferIsValid(entry->buffer))
{ {
entry->offset++; entry->offset++;
if (entry->offset <= entry->nlist) if (entry->offset <= entry->nlist)
@ -527,8 +482,6 @@ gingetbitmap(PG_FUNCTION_ARGS)
ntids++; ntids++;
} }
stopScan(scan);
PG_RETURN_INT64(ntids); PG_RETURN_INT64(ntids);
} }
@ -550,7 +503,6 @@ gingettuple(PG_FUNCTION_ARGS)
startScan(scan); startScan(scan);
res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck); res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck);
stopScan(scan);
PG_RETURN_BOOL(res); PG_RETURN_BOOL(res);
} }

@ -4,7 +4,7 @@
* *
* Copyright (c) 2006-2008, PostgreSQL Global Development Group * Copyright (c) 2006-2008, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.17 2008/04/10 22:25:25 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.18 2008/04/22 17:52:43 teodor Exp $
*-------------------------------------------------------------------------- *--------------------------------------------------------------------------
*/ */
@ -356,14 +356,16 @@ typedef struct GinScanEntryData
/* entry, got from extractQueryFn */ /* entry, got from extractQueryFn */
Datum entry; Datum entry;
/* current ItemPointer to heap, its offset in buffer and buffer */ /* Current page in posting tree */
ItemPointerData curItem;
OffsetNumber offset;
Buffer buffer; Buffer buffer;
/* in case of Posing list */ /* current ItemPointer to heap */
ItemPointerData curItem;
/* used for Posting list and one page in Posting tree */
ItemPointerData *list; ItemPointerData *list;
uint32 nlist; uint32 nlist;
OffsetNumber offset;
bool isFinished; bool isFinished;
bool reduceResult; bool reduceResult;

Loading…
Cancel
Save