mirror of https://github.com/postgres/postgres
When building a GiST index that doesn't fit in cache, buffers are attached to some internal nodes in the index. This speeds up the build by avoiding random I/O that would otherwise be needed to traverse all the way down the tree to the find right leaf page for tuple. Alexander Korotkovpull/1/head
parent
09b68c70af
commit
5edb24a898
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,787 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* gistbuildbuffers.c |
||||
* node buffer management functions for GiST buffering build algorithm. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/access/gist/gistbuildbuffers.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "access/genam.h" |
||||
#include "access/gist_private.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/buffile.h" |
||||
#include "storage/bufmgr.h" |
||||
#include "utils/memutils.h" |
||||
#include "utils/rel.h" |
||||
|
||||
static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb); |
||||
static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb, |
||||
GISTNodeBuffer *nodeBuffer); |
||||
static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb, |
||||
GISTNodeBuffer *nodeBuffer); |
||||
static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, |
||||
GISTNodeBuffer *nodeBuffer); |
||||
static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, |
||||
IndexTuple item); |
||||
static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, |
||||
IndexTuple *item); |
||||
static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb); |
||||
static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum); |
||||
|
||||
static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr); |
||||
static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr); |
||||
|
||||
|
||||
/*
|
||||
* Initialize GiST build buffers. |
||||
*/ |
||||
GISTBuildBuffers * |
||||
gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel) |
||||
{ |
||||
GISTBuildBuffers *gfbb; |
||||
HASHCTL hashCtl; |
||||
|
||||
gfbb = palloc(sizeof(GISTBuildBuffers)); |
||||
gfbb->pagesPerBuffer = pagesPerBuffer; |
||||
gfbb->levelStep = levelStep; |
||||
|
||||
/*
|
||||
* Create a temporary file to hold buffer pages that are swapped out of |
||||
* memory. |
||||
*/ |
||||
gfbb->pfile = BufFileCreateTemp(true); |
||||
gfbb->nFileBlocks = 0; |
||||
|
||||
/* Initialize free page management. */ |
||||
gfbb->nFreeBlocks = 0; |
||||
gfbb->freeBlocksLen = 32; |
||||
gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long)); |
||||
|
||||
/*
|
||||
* Current memory context will be used for all in-memory data structures |
||||
* of buffers which are persistent during buffering build. |
||||
*/ |
||||
gfbb->context = CurrentMemoryContext; |
||||
|
||||
/*
|
||||
* nodeBuffersTab hash is association between index blocks and it's |
||||
* buffers. |
||||
*/ |
||||
hashCtl.keysize = sizeof(BlockNumber); |
||||
hashCtl.entrysize = sizeof(GISTNodeBuffer); |
||||
hashCtl.hcxt = CurrentMemoryContext; |
||||
hashCtl.hash = tag_hash; |
||||
hashCtl.match = memcmp; |
||||
gfbb->nodeBuffersTab = hash_create("gistbuildbuffers", |
||||
1024, |
||||
&hashCtl, |
||||
HASH_ELEM | HASH_CONTEXT |
||||
| HASH_FUNCTION | HASH_COMPARE); |
||||
|
||||
gfbb->bufferEmptyingQueue = NIL; |
||||
|
||||
/*
|
||||
* Per-level node buffers lists for final buffers emptying process. Node |
||||
* buffers are inserted here when they are created. |
||||
*/ |
||||
gfbb->buffersOnLevelsLen = 1; |
||||
gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) * |
||||
gfbb->buffersOnLevelsLen); |
||||
gfbb->buffersOnLevels[0] = NIL; |
||||
|
||||
/*
|
||||
* Block numbers of node buffers which last pages are currently loaded |
||||
* into main memory. |
||||
*/ |
||||
gfbb->loadedBuffersLen = 32; |
||||
gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen * |
||||
sizeof(GISTNodeBuffer *)); |
||||
gfbb->loadedBuffersCount = 0; |
||||
|
||||
/*
|
||||
* Root path item of the tree. Updated on each root node split. |
||||
*/ |
||||
gfbb->rootitem = (GISTBufferingInsertStack *) MemoryContextAlloc( |
||||
gfbb->context, sizeof(GISTBufferingInsertStack)); |
||||
gfbb->rootitem->parent = NULL; |
||||
gfbb->rootitem->blkno = GIST_ROOT_BLKNO; |
||||
gfbb->rootitem->downlinkoffnum = InvalidOffsetNumber; |
||||
gfbb->rootitem->level = maxLevel; |
||||
gfbb->rootitem->refCount = 1; |
||||
|
||||
return gfbb; |
||||
} |
||||
|
||||
/*
|
||||
* Returns a node buffer for given block. The buffer is created if it |
||||
* doesn't exist yet. |
||||
*/ |
||||
GISTNodeBuffer * |
||||
gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate, |
||||
BlockNumber nodeBlocknum, |
||||
OffsetNumber downlinkoffnum, |
||||
GISTBufferingInsertStack *parent) |
||||
{ |
||||
GISTNodeBuffer *nodeBuffer; |
||||
bool found; |
||||
|
||||
/* Find node buffer in hash table */ |
||||
nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab, |
||||
(const void *) &nodeBlocknum, |
||||
HASH_ENTER, |
||||
&found); |
||||
if (!found) |
||||
{ |
||||
/*
|
||||
* Node buffer wasn't found. Initialize the new buffer as empty. |
||||
*/ |
||||
GISTBufferingInsertStack *path; |
||||
int level; |
||||
MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context); |
||||
|
||||
nodeBuffer->pageBuffer = NULL; |
||||
nodeBuffer->blocksCount = 0; |
||||
nodeBuffer->queuedForEmptying = false; |
||||
|
||||
/*
|
||||
* Create a path stack for the page. |
||||
*/ |
||||
if (nodeBlocknum != GIST_ROOT_BLKNO) |
||||
{ |
||||
path = (GISTBufferingInsertStack *) palloc( |
||||
sizeof(GISTBufferingInsertStack)); |
||||
path->parent = parent; |
||||
path->blkno = nodeBlocknum; |
||||
path->downlinkoffnum = downlinkoffnum; |
||||
path->level = parent->level - 1; |
||||
path->refCount = 0; /* initially unreferenced */ |
||||
parent->refCount++; /* this path references its parent */ |
||||
Assert(path->level > 0); |
||||
} |
||||
else |
||||
path = gfbb->rootitem; |
||||
|
||||
nodeBuffer->path = path; |
||||
path->refCount++; |
||||
|
||||
/*
|
||||
* Add this buffer to the list of buffers on this level. Enlarge |
||||
* buffersOnLevels array if needed. |
||||
*/ |
||||
level = path->level; |
||||
if (level >= gfbb->buffersOnLevelsLen) |
||||
{ |
||||
int i; |
||||
|
||||
gfbb->buffersOnLevels = |
||||
(List **) repalloc(gfbb->buffersOnLevels, |
||||
(level + 1) * sizeof(List *)); |
||||
|
||||
/* initialize the enlarged portion */ |
||||
for (i = gfbb->buffersOnLevelsLen; i <= level; i++) |
||||
gfbb->buffersOnLevels[i] = NIL; |
||||
gfbb->buffersOnLevelsLen = level + 1; |
||||
} |
||||
|
||||
/*
|
||||
* Prepend the new buffer to the list of buffers on this level. It's |
||||
* not arbitrary that the new buffer is put to the beginning of the |
||||
* list: in the final emptying phase we loop through all buffers at |
||||
* each level, and flush them. If a page is split during the emptying, |
||||
* it's more efficient to flush the new splitted pages first, before |
||||
* moving on to pre-existing pages on the level. The buffers just |
||||
* created during the page split are likely still in cache, so |
||||
* flushing them immediately is more efficient than putting them to |
||||
* the end of the queue. |
||||
*/ |
||||
gfbb->buffersOnLevels[level] = lcons(nodeBuffer, |
||||
gfbb->buffersOnLevels[level]); |
||||
|
||||
MemoryContextSwitchTo(oldcxt); |
||||
} |
||||
else |
||||
{ |
||||
if (parent != nodeBuffer->path->parent) |
||||
{ |
||||
/*
|
||||
* A different parent path item was provided than we've |
||||
* remembered. We trust caller to provide more correct parent than |
||||
* we have. Previous parent may be outdated by page split. |
||||
*/ |
||||
gistDecreasePathRefcount(nodeBuffer->path->parent); |
||||
nodeBuffer->path->parent = parent; |
||||
parent->refCount++; |
||||
} |
||||
} |
||||
|
||||
return nodeBuffer; |
||||
} |
||||
|
||||
/*
|
||||
* Allocate memory for a buffer page. |
||||
*/ |
||||
static GISTNodeBufferPage * |
||||
gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb) |
||||
{ |
||||
GISTNodeBufferPage *pageBuffer; |
||||
|
||||
pageBuffer = (GISTNodeBufferPage *) MemoryContextAlloc(gfbb->context, |
||||
BLCKSZ); |
||||
pageBuffer->prev = InvalidBlockNumber; |
||||
|
||||
/* Set page free space */ |
||||
PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET; |
||||
return pageBuffer; |
||||
} |
||||
|
||||
/*
|
||||
* Add specified block number into loadedBuffers array. |
||||
*/ |
||||
static void |
||||
gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer) |
||||
{ |
||||
/* Enlarge the array if needed */ |
||||
if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen) |
||||
{ |
||||
gfbb->loadedBuffersLen *= 2; |
||||
gfbb->loadedBuffers = (GISTNodeBuffer **) |
||||
repalloc(gfbb->loadedBuffers, |
||||
gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *)); |
||||
} |
||||
|
||||
gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer; |
||||
gfbb->loadedBuffersCount++; |
||||
} |
||||
|
||||
/*
|
||||
* Load last page of node buffer into main memory. |
||||
*/ |
||||
static void |
||||
gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer) |
||||
{ |
||||
/* Check if we really should load something */ |
||||
if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0) |
||||
{ |
||||
/* Allocate memory for page */ |
||||
nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb); |
||||
|
||||
/* Read block from temporary file */ |
||||
ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum, |
||||
nodeBuffer->pageBuffer); |
||||
|
||||
/* Mark file block as free */ |
||||
gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum); |
||||
|
||||
/* Mark node buffer as loaded */ |
||||
gistAddLoadedBuffer(gfbb, nodeBuffer); |
||||
nodeBuffer->pageBlocknum = InvalidBlockNumber; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* Write last page of node buffer to the disk. |
||||
*/ |
||||
static void |
||||
gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer) |
||||
{ |
||||
/* Check if we have something to write */ |
||||
if (nodeBuffer->pageBuffer) |
||||
{ |
||||
BlockNumber blkno; |
||||
|
||||
/* Get free file block */ |
||||
blkno = gistBuffersGetFreeBlock(gfbb); |
||||
|
||||
/* Write block to the temporary file */ |
||||
WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer); |
||||
|
||||
/* Free memory of that page */ |
||||
pfree(nodeBuffer->pageBuffer); |
||||
nodeBuffer->pageBuffer = NULL; |
||||
|
||||
/* Save block number */ |
||||
nodeBuffer->pageBlocknum = blkno; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* Write last pages of all node buffers to the disk. |
||||
*/ |
||||
void |
||||
gistUnloadNodeBuffers(GISTBuildBuffers *gfbb) |
||||
{ |
||||
int i; |
||||
|
||||
/* Unload all the buffers that have a page loaded in memory. */ |
||||
for (i = 0; i < gfbb->loadedBuffersCount; i++) |
||||
gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]); |
||||
|
||||
/* Now there are no node buffers with loaded last page */ |
||||
gfbb->loadedBuffersCount = 0; |
||||
} |
||||
|
||||
/*
|
||||
* Add index tuple to buffer page. |
||||
*/ |
||||
static void |
||||
gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup) |
||||
{ |
||||
Size itupsz = IndexTupleSize(itup); |
||||
char *ptr; |
||||
|
||||
/* There should be enough of space. */ |
||||
Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz)); |
||||
|
||||
/* Reduce free space value of page to reserve a spot for the tuple. */ |
||||
PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz); |
||||
|
||||
/* Get pointer to the spot we reserved (ie. end of free space). */ |
||||
ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET |
||||
+ PAGE_FREE_SPACE(pageBuffer); |
||||
|
||||
/* Copy the index tuple there. */ |
||||
memcpy(ptr, itup, itupsz); |
||||
} |
||||
|
||||
/*
|
||||
* Get last item from buffer page and remove it from page. |
||||
*/ |
||||
static void |
||||
gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup) |
||||
{ |
||||
IndexTuple ptr; |
||||
Size itupsz; |
||||
|
||||
Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */ |
||||
|
||||
/* Get pointer to last index tuple */ |
||||
ptr = (IndexTuple) ((char *) pageBuffer |
||||
+ BUFFER_PAGE_DATA_OFFSET |
||||
+ PAGE_FREE_SPACE(pageBuffer)); |
||||
itupsz = IndexTupleSize(ptr); |
||||
|
||||
/* Make a copy of the tuple */ |
||||
*itup = (IndexTuple) palloc(itupsz); |
||||
memcpy(*itup, ptr, itupsz); |
||||
|
||||
/* Mark the space used by the tuple as free */ |
||||
PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz); |
||||
} |
||||
|
||||
/*
|
||||
* Push an index tuple to node buffer. |
||||
*/ |
||||
void |
||||
gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer, |
||||
IndexTuple itup) |
||||
{ |
||||
/*
|
||||
* Most part of memory operations will be in buffering build persistent |
||||
* context. So, let's switch to it. |
||||
*/ |
||||
MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context); |
||||
|
||||
/*
|
||||
* If the buffer is currently empty, create the first page. |
||||
*/ |
||||
if (nodeBuffer->blocksCount == 0) |
||||
{ |
||||
nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb); |
||||
nodeBuffer->blocksCount = 1; |
||||
gistAddLoadedBuffer(gfbb, nodeBuffer); |
||||
} |
||||
|
||||
/* Load last page of node buffer if it wasn't in memory already */ |
||||
if (!nodeBuffer->pageBuffer) |
||||
gistLoadNodeBuffer(gfbb, nodeBuffer); |
||||
|
||||
/*
|
||||
* Check if there is enough space on the last page for the tuple. |
||||
*/ |
||||
if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup)) |
||||
{ |
||||
/*
|
||||
* Nope. Swap previous block to disk and allocate a new one. |
||||
*/ |
||||
BlockNumber blkno; |
||||
|
||||
/* Write filled page to the disk */ |
||||
blkno = gistBuffersGetFreeBlock(gfbb); |
||||
WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer); |
||||
|
||||
/*
|
||||
* Reset the in-memory page as empty, and link the previous block to |
||||
* the new page by storing its block number in the prev-link. |
||||
*/ |
||||
PAGE_FREE_SPACE(nodeBuffer->pageBuffer) = |
||||
BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata)); |
||||
nodeBuffer->pageBuffer->prev = blkno; |
||||
|
||||
/* We've just added one more page */ |
||||
nodeBuffer->blocksCount++; |
||||
} |
||||
|
||||
gistPlaceItupToPage(nodeBuffer->pageBuffer, itup); |
||||
|
||||
/*
|
||||
* If the buffer just overflowed, add it to the emptying queue. |
||||
*/ |
||||
if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying) |
||||
{ |
||||
gfbb->bufferEmptyingQueue = lcons(nodeBuffer, |
||||
gfbb->bufferEmptyingQueue); |
||||
nodeBuffer->queuedForEmptying = true; |
||||
} |
||||
|
||||
/* Restore memory context */ |
||||
MemoryContextSwitchTo(oldcxt); |
||||
} |
||||
|
||||
/*
|
||||
* Removes one index tuple from node buffer. Returns true if success and false |
||||
* if node buffer is empty. |
||||
*/ |
||||
bool |
||||
gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer, |
||||
IndexTuple *itup) |
||||
{ |
||||
/*
|
||||
* If node buffer is empty then return false. |
||||
*/ |
||||
if (nodeBuffer->blocksCount <= 0) |
||||
return false; |
||||
|
||||
/* Load last page of node buffer if needed */ |
||||
if (!nodeBuffer->pageBuffer) |
||||
gistLoadNodeBuffer(gfbb, nodeBuffer); |
||||
|
||||
/*
|
||||
* Get index tuple from last non-empty page. |
||||
*/ |
||||
gistGetItupFromPage(nodeBuffer->pageBuffer, itup); |
||||
|
||||
/*
|
||||
* If we just removed the last tuple from the page, fetch previous page on |
||||
* this node buffer (if any). |
||||
*/ |
||||
if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer)) |
||||
{ |
||||
BlockNumber prevblkno; |
||||
|
||||
/*
|
||||
* blocksCount includes the page in pageBuffer, so decrease it now. |
||||
*/ |
||||
nodeBuffer->blocksCount--; |
||||
|
||||
/*
|
||||
* If there's more pages, fetch previous one. |
||||
*/ |
||||
prevblkno = nodeBuffer->pageBuffer->prev; |
||||
if (prevblkno != InvalidBlockNumber) |
||||
{ |
||||
/* There is a previous page. Fetch it. */ |
||||
Assert(nodeBuffer->blocksCount > 0); |
||||
ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer); |
||||
|
||||
/*
|
||||
* Now that we've read the block in memory, we can release its |
||||
* on-disk block for reuse. |
||||
*/ |
||||
gistBuffersReleaseBlock(gfbb, prevblkno); |
||||
} |
||||
else |
||||
{ |
||||
/* No more pages. Free memory. */ |
||||
Assert(nodeBuffer->blocksCount == 0); |
||||
pfree(nodeBuffer->pageBuffer); |
||||
nodeBuffer->pageBuffer = NULL; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
/*
|
||||
* Select a currently unused block for writing to. |
||||
*/ |
||||
static long |
||||
gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb) |
||||
{ |
||||
/*
|
||||
* If there are multiple free blocks, we select the one appearing last in |
||||
* freeBlocks[]. If there are none, assign the next block at the end of |
||||
* the file (causing the file to be extended). |
||||
*/ |
||||
if (gfbb->nFreeBlocks > 0) |
||||
return gfbb->freeBlocks[--gfbb->nFreeBlocks]; |
||||
else |
||||
return gfbb->nFileBlocks++; |
||||
} |
||||
|
||||
/*
|
||||
* Return a block# to the freelist. |
||||
*/ |
||||
static void |
||||
gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum) |
||||
{ |
||||
int ndx; |
||||
|
||||
/* Enlarge freeBlocks array if full. */ |
||||
if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen) |
||||
{ |
||||
gfbb->freeBlocksLen *= 2; |
||||
gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks, |
||||
gfbb->freeBlocksLen * |
||||
sizeof(long)); |
||||
} |
||||
|
||||
/* Add blocknum to array */ |
||||
ndx = gfbb->nFreeBlocks++; |
||||
gfbb->freeBlocks[ndx] = blocknum; |
||||
} |
||||
|
||||
/*
|
||||
* Free buffering build data structure. |
||||
*/ |
||||
void |
||||
gistFreeBuildBuffers(GISTBuildBuffers *gfbb) |
||||
{ |
||||
/* Close buffers file. */ |
||||
BufFileClose(gfbb->pfile); |
||||
|
||||
/* All other things will be freed on memory context release */ |
||||
} |
||||
|
||||
/*
|
||||
* Data structure representing information about node buffer for index tuples |
||||
* relocation from splitted node buffer. |
||||
*/ |
||||
typedef struct |
||||
{ |
||||
GISTENTRY entry[INDEX_MAX_KEYS]; |
||||
bool isnull[INDEX_MAX_KEYS]; |
||||
GISTPageSplitInfo *splitinfo; |
||||
GISTNodeBuffer *nodeBuffer; |
||||
} RelocationBufferInfo; |
||||
|
||||
/*
|
||||
* At page split, distribute tuples from the buffer of the split page to |
||||
* new buffers for the created page halves. This also adjusts the downlinks |
||||
* in 'splitinfo' to include the tuples in the buffers. |
||||
*/ |
||||
void |
||||
gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, |
||||
Relation r, GISTBufferingInsertStack *path, |
||||
Buffer buffer, List *splitinfo) |
||||
{ |
||||
RelocationBufferInfo *relocationBuffersInfos; |
||||
bool found; |
||||
GISTNodeBuffer *nodeBuffer; |
||||
BlockNumber blocknum; |
||||
IndexTuple itup; |
||||
int splitPagesCount = 0, |
||||
i; |
||||
GISTENTRY entry[INDEX_MAX_KEYS]; |
||||
bool isnull[INDEX_MAX_KEYS]; |
||||
GISTNodeBuffer nodebuf; |
||||
ListCell *lc; |
||||
|
||||
/* If the splitted page doesn't have buffers, we have nothing to do. */ |
||||
if (!LEVEL_HAS_BUFFERS(path->level, gfbb)) |
||||
return; |
||||
|
||||
/*
|
||||
* Get the node buffer of the splitted page. |
||||
*/ |
||||
blocknum = BufferGetBlockNumber(buffer); |
||||
nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum, |
||||
HASH_FIND, &found); |
||||
if (!found) |
||||
{ |
||||
/*
|
||||
* Node buffer should exist at this point. If it didn't exist before, |
||||
* the insertion that caused the page to split should've created it. |
||||
*/ |
||||
elog(ERROR, "node buffer of page being split (%u) does not exist", |
||||
blocknum); |
||||
} |
||||
|
||||
/*
|
||||
* Make a copy of the old buffer, as we're going reuse it as the buffer |
||||
* for the new left page, which is on the same block as the old page. |
||||
* That's not true for the root page, but that's fine because we never |
||||
* have a buffer on the root page anyway. The original algorithm as |
||||
* described by Arge et al did, but it's of no use, as you might as well |
||||
* read the tuples straight from the heap instead of the root buffer. |
||||
*/ |
||||
Assert(blocknum != GIST_ROOT_BLKNO); |
||||
memcpy(&nodebuf, nodeBuffer, sizeof(GISTNodeBuffer)); |
||||
|
||||
/* Reset the old buffer, used for the new left page from now on */ |
||||
nodeBuffer->blocksCount = 0; |
||||
nodeBuffer->pageBuffer = NULL; |
||||
nodeBuffer->pageBlocknum = InvalidBlockNumber; |
||||
|
||||
/* Reassign pointer to the saved copy. */ |
||||
nodeBuffer = &nodebuf; |
||||
|
||||
/*
|
||||
* Allocate memory for information about relocation buffers. |
||||
*/ |
||||
splitPagesCount = list_length(splitinfo); |
||||
relocationBuffersInfos = |
||||
(RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) * |
||||
splitPagesCount); |
||||
|
||||
/*
|
||||
* Fill relocation buffers information for node buffers of pages produced |
||||
* by split. |
||||
*/ |
||||
i = 0; |
||||
foreach(lc, splitinfo) |
||||
{ |
||||
GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc); |
||||
GISTNodeBuffer *newNodeBuffer; |
||||
|
||||
/* Decompress parent index tuple of node buffer page. */ |
||||
gistDeCompressAtt(giststate, r, |
||||
si->downlink, NULL, (OffsetNumber) 0, |
||||
relocationBuffersInfos[i].entry, |
||||
relocationBuffersInfos[i].isnull); |
||||
|
||||
/*
|
||||
* Create a node buffer for the page. The leftmost half is on the same |
||||
* block as the old page before split, so for the leftmost half this |
||||
* will return the original buffer, which was emptied earlier in this |
||||
* function. |
||||
*/ |
||||
newNodeBuffer = gistGetNodeBuffer(gfbb, |
||||
giststate, |
||||
BufferGetBlockNumber(si->buf), |
||||
path->downlinkoffnum, |
||||
path->parent); |
||||
|
||||
relocationBuffersInfos[i].nodeBuffer = newNodeBuffer; |
||||
relocationBuffersInfos[i].splitinfo = si; |
||||
|
||||
i++; |
||||
} |
||||
|
||||
/*
|
||||
* Loop through all index tuples on the buffer on the splitted page, |
||||
* moving them to buffers on the new pages. |
||||
*/ |
||||
while (gistPopItupFromNodeBuffer(gfbb, nodeBuffer, &itup)) |
||||
{ |
||||
float sum_grow, |
||||
which_grow[INDEX_MAX_KEYS]; |
||||
int i, |
||||
which; |
||||
IndexTuple newtup; |
||||
RelocationBufferInfo *targetBufferInfo; |
||||
|
||||
/*
|
||||
* Choose which page this tuple should go to. |
||||
*/ |
||||
gistDeCompressAtt(giststate, r, |
||||
itup, NULL, (OffsetNumber) 0, entry, isnull); |
||||
|
||||
which = -1; |
||||
*which_grow = -1.0f; |
||||
sum_grow = 1.0f; |
||||
|
||||
for (i = 0; i < splitPagesCount && sum_grow; i++) |
||||
{ |
||||
int j; |
||||
RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i]; |
||||
|
||||
sum_grow = 0.0f; |
||||
for (j = 0; j < r->rd_att->natts; j++) |
||||
{ |
||||
float usize; |
||||
|
||||
usize = gistpenalty(giststate, j, |
||||
&splitPageInfo->entry[j], |
||||
splitPageInfo->isnull[j], |
||||
&entry[j], isnull[j]); |
||||
|
||||
if (which_grow[j] < 0 || usize < which_grow[j]) |
||||
{ |
||||
which = i; |
||||
which_grow[j] = usize; |
||||
if (j < r->rd_att->natts - 1 && i == 0) |
||||
which_grow[j + 1] = -1; |
||||
sum_grow += which_grow[j]; |
||||
} |
||||
else if (which_grow[j] == usize) |
||||
sum_grow += usize; |
||||
else |
||||
{ |
||||
sum_grow = 1; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
targetBufferInfo = &relocationBuffersInfos[which]; |
||||
|
||||
/* Push item to selected node buffer */ |
||||
gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup); |
||||
|
||||
/* Adjust the downlink for this page, if needed. */ |
||||
newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink, |
||||
itup, giststate); |
||||
if (newtup) |
||||
{ |
||||
gistDeCompressAtt(giststate, r, |
||||
newtup, NULL, (OffsetNumber) 0, |
||||
targetBufferInfo->entry, |
||||
targetBufferInfo->isnull); |
||||
|
||||
targetBufferInfo->splitinfo->downlink = newtup; |
||||
} |
||||
} |
||||
|
||||
pfree(relocationBuffersInfos); |
||||
} |
||||
|
||||
|
||||
/*
|
||||
* Wrappers around BufFile operations. The main difference is that these |
||||
* wrappers report errors with ereport(), so that the callers don't need |
||||
* to check the return code. |
||||
*/ |
||||
|
||||
static void |
||||
ReadTempFileBlock(BufFile *file, long blknum, void *ptr) |
||||
{ |
||||
if (BufFileSeekBlock(file, blknum) != 0) |
||||
elog(ERROR, "could not seek temporary file: %m"); |
||||
if (BufFileRead(file, ptr, BLCKSZ) != BLCKSZ) |
||||
elog(ERROR, "could not read temporary file: %m"); |
||||
} |
||||
|
||||
static void |
||||
WriteTempFileBlock(BufFile *file, long blknum, void *ptr) |
||||
{ |
||||
if (BufFileSeekBlock(file, blknum) != 0) |
||||
elog(ERROR, "could not seek temporary file: %m"); |
||||
if (BufFileWrite(file, ptr, BLCKSZ) != BLCKSZ) |
||||
{ |
||||
/*
|
||||
* the other errors in Read/WriteTempFileBlock shouldn't happen, but |
||||
* an error at write can easily happen if you run out of disk space. |
||||
*/ |
||||
ereport(ERROR, |
||||
(errcode_for_file_access(), |
||||
errmsg("could not write block %ld of temporary file: %m", |
||||
blknum))); |
||||
} |
||||
} |
||||
Loading…
Reference in new issue