diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 9b968aa0f29..f4f79f27062 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -66,7 +66,9 @@ typedef enum */ typedef struct BTParallelScanDescData { - BlockNumber btps_scanPage; /* latest or next page to be scanned */ + BlockNumber btps_nextScanPage; /* next page to be scanned */ + BlockNumber btps_lastCurrPage; /* page whose sibling link was copied into + * btps_nextScanPage */ BTPS_State btps_pageStatus; /* indicates whether next page is * available for scan. see above for * possible states of parallel scan. */ @@ -550,7 +552,8 @@ btinitparallelscan(void *target) BTParallelScanDesc bt_target = (BTParallelScanDesc) target; SpinLockInit(&bt_target->btps_mutex); - bt_target->btps_scanPage = InvalidBlockNumber; + bt_target->btps_nextScanPage = InvalidBlockNumber; + bt_target->btps_lastCurrPage = InvalidBlockNumber; bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; ConditionVariableInit(&bt_target->btps_cv); } @@ -575,7 +578,8 @@ btparallelrescan(IndexScanDesc scan) * consistency. */ SpinLockAcquire(&btscan->btps_mutex); - btscan->btps_scanPage = InvalidBlockNumber; + btscan->btps_nextScanPage = InvalidBlockNumber; + btscan->btps_lastCurrPage = InvalidBlockNumber; btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; SpinLockRelease(&btscan->btps_mutex); } @@ -591,18 +595,20 @@ btparallelrescan(IndexScanDesc scan) * start just yet (only backends that call from _bt_first are capable of * starting primitive index scans, which they indicate by passing first=true). * - * If the return value is true, *pageno returns the next or current page - * of the scan (depending on the scan direction). An invalid block number - * means the scan hasn't yet started, or that caller needs to start the next - * primitive index scan (if it's the latter case we'll set so.needPrimScan). - * The first time a participating process reaches the last page, it will return - * true and set *pageno to P_NONE; after that, further attempts to seize the - * scan will return false. + * If the return value is true, *next_scan_page returns the next page of the + * scan, and *last_curr_page returns the page that *next_scan_page came from. + * An invalid *next_scan_page means the scan hasn't yet started, or that + * caller needs to start the next primitive index scan (if it's the latter + * case we'll set so.needPrimScan). The first time a participating process + * reaches the last page, it will return true and set *next_scan_page to + * P_NONE; after that, further attempts to seize the scan will return false. * - * Callers should ignore the value of pageno if the return value is false. + * Callers should ignore the value of *next_scan_page and *last_curr_page if + * the return value is false. */ bool -_bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) +_bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, + BlockNumber *last_curr_page, bool first) { BTScanOpaque so = (BTScanOpaque) scan->opaque; bool exit_loop = false; @@ -610,7 +616,17 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; - *pageno = P_NONE; + *next_scan_page = P_NONE; + *last_curr_page = InvalidBlockNumber; + + /* + * Reset so->currPos, and initialize moreLeft/moreRight such that the next + * call to _bt_readnextpage treats this backend similarly to a serial + * backend that steps from *last_curr_page to *next_scan_page (unless this + * backend's so->currPos is initialized by _bt_readfirstpage before then). + */ + BTScanPosInvalidate(so->currPos); + so->currPos.moreLeft = so->currPos.moreRight = true; if (first) { @@ -660,7 +676,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) array->cur_elem = btscan->btps_arrElems[i]; skey->sk_argument = array->elem_values[array->cur_elem]; } - *pageno = InvalidBlockNumber; + *next_scan_page = InvalidBlockNumber; exit_loop = true; } else @@ -688,7 +704,8 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) * of advancing it to a new page! */ btscan->btps_pageStatus = BTPARALLEL_ADVANCING; - *pageno = btscan->btps_scanPage; + *next_scan_page = btscan->btps_nextScanPage; + *last_curr_page = btscan->btps_lastCurrPage; exit_loop = true; } SpinLockRelease(&btscan->btps_mutex); @@ -703,17 +720,21 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) /* * _bt_parallel_release() -- Complete the process of advancing the scan to a - * new page. We now have the new value btps_scanPage; some other backend + * new page. We now have the new value btps_nextScanPage; another backend * can now begin advancing the scan. * - * Callers whose scan uses array keys must save their scan_page argument so + * Callers whose scan uses array keys must save their curr_page argument so * that it can be passed to _bt_parallel_primscan_schedule, should caller - * determine that another primitive index scan is required. If that happens, - * scan_page won't be scanned by any backend (unless the next primitive index - * scan lands on scan_page). + * determine that another primitive index scan is required. + * + * Note: unlike the serial case, parallel scans don't need to remember both + * sibling links. next_scan_page is whichever link is next given the scan's + * direction. That's all we'll ever need, since the direction of a parallel + * scan can never change. */ void -_bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page) +_bt_parallel_release(IndexScanDesc scan, BlockNumber next_scan_page, + BlockNumber curr_page) { ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; @@ -722,7 +743,8 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page) parallel_scan->ps_offset); SpinLockAcquire(&btscan->btps_mutex); - btscan->btps_scanPage = scan_page; + btscan->btps_nextScanPage = next_scan_page; + btscan->btps_lastCurrPage = curr_page; btscan->btps_pageStatus = BTPARALLEL_IDLE; SpinLockRelease(&btscan->btps_mutex); ConditionVariableSignal(&btscan->btps_cv); @@ -778,13 +800,13 @@ _bt_parallel_done(IndexScanDesc scan) /* * _bt_parallel_primscan_schedule() -- Schedule another primitive index scan. * - * Caller passes the block number most recently passed to _bt_parallel_release + * Caller passes the curr_page most recently passed to _bt_parallel_release * by its backend. Caller successfully schedules the next primitive index scan * if the shared parallel state hasn't been seized since caller's backend last * advanced the scan. */ void -_bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber prev_scan_page) +_bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page) { BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; @@ -796,10 +818,11 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber prev_scan_page) parallel_scan->ps_offset); SpinLockAcquire(&btscan->btps_mutex); - if (btscan->btps_scanPage == prev_scan_page && + if (btscan->btps_lastCurrPage == curr_page && btscan->btps_pageStatus == BTPARALLEL_IDLE) { - btscan->btps_scanPage = InvalidBlockNumber; + btscan->btps_nextScanPage = InvalidBlockNumber; + btscan->btps_lastCurrPage = InvalidBlockNumber; btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN; /* Serialize scan's current array keys */ diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index a662665b55f..2275553be78 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -43,12 +43,13 @@ static inline void _bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum, ItemPointer heapTid, int tupleOffset); static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir); -static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir); -static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, - ScanDirection dir); -static Buffer _bt_walk_left(Relation rel, Buffer buf); +static bool _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, + ScanDirection dir); +static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, + BlockNumber lastcurrblkno, ScanDirection dir); +static Buffer _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, + BlockNumber lastcurrblkno); static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); -static inline void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir); /* @@ -880,7 +881,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; - Buffer buf; BTStack stack; OffsetNumber offnum; StrategyNumber strat; @@ -889,10 +889,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) ScanKeyData notnullkeys[INDEX_MAX_KEYS]; int keysz = 0; int i; - bool status; StrategyNumber strat_total; BTScanPosItem *currItem; - BlockNumber blkno; Assert(!BTScanPosIsValid(so->currPos)); @@ -924,7 +922,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) */ if (scan->parallel_scan != NULL) { - status = _bt_parallel_seize(scan, &blkno, true); + BlockNumber blkno, + lastcurrblkno; + bool status; + + status = _bt_parallel_seize(scan, &blkno, &lastcurrblkno, true); /* * Initialize arrays (when _bt_parallel_seize didn't already set up @@ -942,7 +944,13 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } else if (blkno != InvalidBlockNumber) { - if (!_bt_parallel_readpage(scan, blkno, dir)) + Assert(!so->needPrimScan); + + /* + * We anticipated starting another primitive scan, but some other + * worker bet us to it + */ + if (!_bt_readnextpage(scan, blkno, lastcurrblkno, dir)) return false; goto readcomplete; } @@ -1392,12 +1400,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * position ourselves on the target leaf page. */ Assert(ScanDirectionIsBackward(dir) == inskey.backward); - stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ); + stack = _bt_search(rel, NULL, &inskey, &so->currPos.buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); - if (!BufferIsValid(buf)) + if (!BufferIsValid(so->currPos.buf)) { /* * We only get here if the index is completely empty. Lock relation @@ -1411,11 +1419,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (IsolationIsSerializable()) { PredicateLockRelation(rel, scan->xs_snapshot); - stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ); + stack = _bt_search(rel, NULL, &inskey, &so->currPos.buf, BT_READ); _bt_freestack(stack); } - if (!BufferIsValid(buf)) + if (!BufferIsValid(so->currPos.buf)) { /* * Mark parallel scan as done, so that all the workers can finish @@ -1427,17 +1435,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } } - PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot); - - _bt_initialize_more_data(so, dir); - /* position to the precise item on the page */ - offnum = _bt_binsrch(rel, &inskey, buf); - Assert(!BTScanPosIsValid(so->currPos)); - so->currPos.buf = buf; + offnum = _bt_binsrch(rel, &inskey, so->currPos.buf); /* - * Now load data from the first page of the scan. + * Now load data from the first page of the scan (usually the page + * currently in so->currPos.buf). * * If inskey.nextkey = false and inskey.backward = false, offnum is * positioned at the first non-pivot tuple >= inskey.scankeys. @@ -1455,24 +1458,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * for the page. For example, when inskey is both < the leaf page's high * key and > all of its non-pivot tuples, offnum will be "maxoff + 1". */ - if (!_bt_readpage(scan, dir, offnum, true)) - { - /* - * There's no actually-matching data on this page. Try to advance to - * the next page. Return false if there's no matching data at all. - */ - _bt_unlockbuf(scan->indexRelation, so->currPos.buf); - if (!_bt_steppage(scan, dir)) - return false; - } - else - { - /* We have at least one item to return as scan's first item */ - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); - } + if (!_bt_readfirstpage(scan, offnum, dir)) + return false; readcomplete: /* OK, itemIndex says what to return */ + Assert(BTScanPosIsValid(so->currPos)); currItem = &so->currPos.items[so->currPos.itemIndex]; scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) @@ -1492,8 +1483,9 @@ readcomplete: * heap tuple, and if requested, scan->xs_itup points to a copy of the * index tuple. so->currPos is updated as needed. * - * On failure exit (no more tuples), we release pin and set - * so->currPos.buf to InvalidBuffer. + * On failure exit (no more tuples), we invalidate so->currPos. It'll + * still be possible for the scan to return tuples by changing direction, + * though we'll need to call _bt_first anew in that other direction. */ bool _bt_next(IndexScanDesc scan, ScanDirection dir) @@ -1523,6 +1515,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) } /* OK, itemIndex says what to return */ + Assert(BTScanPosIsValid(so->currPos)); currItem = &so->currPos.items[so->currPos.itemIndex]; scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) @@ -1545,14 +1538,6 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) * that there can be no more matching tuples in the current scan direction * (could just be for the current primitive index scan when scan has arrays). * - * _bt_first caller passes us an offnum returned by _bt_binsrch, which might - * be an out of bounds offnum such as "maxoff + 1" in certain corner cases. - * _bt_checkkeys will stop the scan as soon as an equality qual fails (when - * its scan key was marked required), so _bt_first _must_ pass us an offnum - * exactly at the beginning of where equal tuples are to be found. When we're - * passed an offnum past the end of the page, we might still manage to stop - * the scan on this page by calling _bt_checkkeys against the high key. - * * In the case of a parallel scan, caller must have called _bt_parallel_seize * prior to calling this function; this function will invoke * _bt_parallel_release before returning. @@ -1563,6 +1548,7 @@ static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, bool firstPage) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Page page; BTPageOpaque opaque; @@ -1573,27 +1559,36 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, int itemIndex, indnatts; - /* - * We must have the buffer pinned and locked, but the usual macro can't be - * used here; this function is what makes it good for currPos. - */ - Assert(BufferIsValid(so->currPos.buf)); - + /* save the page/buffer block number, along with its sibling links */ page = BufferGetPage(so->currPos.buf); opaque = BTPageGetOpaque(page); + so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf); + so->currPos.prevPage = opaque->btpo_prev; + so->currPos.nextPage = opaque->btpo_next; + + Assert(!P_IGNORE(opaque)); + Assert(BTScanPosIsPinned(so->currPos)); - /* allow next page be processed by parallel worker */ if (scan->parallel_scan) { + /* allow next/prev page to be read by other worker without delay */ if (ScanDirectionIsForward(dir)) - pstate.prev_scan_page = opaque->btpo_next; + _bt_parallel_release(scan, so->currPos.nextPage, + so->currPos.currPage); else - pstate.prev_scan_page = BufferGetBlockNumber(so->currPos.buf); - - _bt_parallel_release(scan, pstate.prev_scan_page); + _bt_parallel_release(scan, so->currPos.prevPage, + so->currPos.currPage); } - indnatts = IndexRelationGetNumberOfAttributes(scan->indexRelation); + /* initialize remaining currPos fields (before moreLeft/moreright) */ + so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); + so->currPos.dir = dir; + so->currPos.nextTupleOffset = 0; + + PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot); + + /* initialize local variables */ + indnatts = IndexRelationGetNumberOfAttributes(rel); arrayKeys = so->numArrayKeys != 0; minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); @@ -1612,35 +1607,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, pstate.rechecks = 0; pstate.targetdistance = 0; - /* - * We note the buffer's block number so that we can release the pin later. - * This allows us to re-read the buffer if it is needed again for hinting. - */ - so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf); - - /* - * We save the LSN of the page as we read it, so that we know whether it - * safe to apply LP_DEAD hints to the page later. This allows us to drop - * the pin for MVCC scans, which allows vacuum to avoid blocking. - */ - so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); - - /* - * we must save the page's right-link while scanning it; this tells us - * where to step right to after we're done with these items. There is no - * corresponding need for the left-link, since splits always go right. - */ - so->currPos.nextPage = opaque->btpo_next; - - /* initialize tuple workspace to empty */ - so->currPos.nextTupleOffset = 0; - - /* - * Now that the current page has been made consistent, the macro should be - * good. - */ - Assert(BTScanPosIsPinned(so->currPos)); - /* * Prechecking the value of the continuescan flag for the last item on the * page (for backwards scan it will be the first item on a page). If we @@ -1829,7 +1795,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, IndexTuple itup = (IndexTuple) PageGetItem(page, iid); int truncatt; - truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation); + truncatt = BTreeTupleGetNAtts(itup, rel); pstate.prechecked = false; /* precheck didn't cover HIKEY */ _bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt); } @@ -1959,7 +1925,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, * We don't need to visit page to the left when no more matches will * be found there */ - if (!pstate.continuescan || P_LEFTMOST(opaque)) + if (!pstate.continuescan) so->currPos.moreLeft = false; Assert(itemIndex >= 0); @@ -2060,20 +2026,25 @@ _bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum, /* * _bt_steppage() -- Step to next page containing valid data for scan * - * On entry, if so->currPos.buf is valid the buffer is pinned but not locked; - * if pinned, we'll drop the pin before moving to next page. The buffer is - * not locked on entry. + * On entry, if so->currPos.buf is valid the buffer is pinned but not locked. + * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped + * the pin eagerly earlier on. The scan must have so->currPos.currPage set to + * a valid block, in any case. * - * For success on a scan using a non-MVCC snapshot we hold a pin, but not a - * read lock, on that page. If we do not hold the pin, we set so->currPos.buf - * to InvalidBuffer. We return true to indicate success. + * This is a wrapper on _bt_readnextpage that performs final steps for the + * current page. It sets up the _bt_readnextpage call using either local + * state saved in so->currPos by the most recent _bt_readpage call, or using + * shared parallel scan state (obtained by seizing the parallel scan here). + * + * Parallel scan callers that have already seized the scan should directly + * call _bt_readnextpage, rather than calling here. */ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; - BlockNumber blkno = InvalidBlockNumber; - bool status; + BlockNumber blkno, + lastcurrblkno; Assert(BTScanPosIsValid(so->currPos)); @@ -2115,7 +2086,6 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir) * In effect, btrestpos leaves advancing the arrays up to the first * _bt_readpage call (that takes place after it has restored markPos). */ - Assert(so->markPos.dir == dir); if (so->needPrimScan) { if (ScanDirectionIsForward(dir)) @@ -2123,319 +2093,288 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir) else so->markPos.moreLeft = true; } + + /* mark/restore not supported by parallel scans */ + Assert(!scan->parallel_scan); } - if (ScanDirectionIsForward(dir)) + BTScanPosUnpinIfPinned(so->currPos); + + /* Walk to the next page with data */ + if (!scan->parallel_scan) { - /* Walk right to the next page with data */ - if (scan->parallel_scan != NULL) - { - /* - * Seize the scan to get the next block number; if the scan has - * ended already, bail out. - */ - status = _bt_parallel_seize(scan, &blkno, false); - if (!status) - { - /* release the previous buffer, if pinned */ - BTScanPosUnpinIfPinned(so->currPos); - BTScanPosInvalidate(so->currPos); - return false; - } - } - else - { - /* Not parallel, so use the previously-saved nextPage link. */ + /* Not parallel, so use local state set by the last _bt_readpage */ + if (ScanDirectionIsForward(dir)) blkno = so->currPos.nextPage; - } + else + blkno = so->currPos.prevPage; + lastcurrblkno = so->currPos.currPage; + } + else + { + /* + * Seize the scan to get the nextPage and currPage from shared + * parallel state (saved from parallel scan's last _bt_readpage) + */ + if (!_bt_parallel_seize(scan, &blkno, &lastcurrblkno, false)) + return false; + } - /* Remember we left a page with data */ - so->currPos.moreLeft = true; + return _bt_readnextpage(scan, blkno, lastcurrblkno, dir); +} + +/* + * _bt_readfirstpage() -- Read first page containing valid data for _bt_first + * + * _bt_first caller passes us an offnum returned by _bt_binsrch, which might + * be an out of bounds offnum such as "maxoff + 1" in certain corner cases. + * _bt_checkkeys will stop the scan as soon as an equality qual fails (when + * its scan key was marked required), so _bt_first _must_ pass us an offnum + * exactly at the beginning of where equal tuples are to be found. When we're + * passed an offnum past the end of the page, we might still manage to stop + * the scan on this page by calling _bt_checkkeys against the high key. See + * _bt_readpage for full details. + * + * On entry, so->currPos must be pinned and locked (so offnum stays valid). + * Parallel scan callers must have seized the scan before calling here. + * + * On exit, we'll have updated so->currPos and retained locks and pins + * according to the same rules as those laid out for _bt_readnextpage exit. + * Like _bt_readnextpage, our return value indicates if there are any matching + * records in the given direction. + * + * We always release the scan for a parallel scan caller, regardless of + * success or failure; we'll call _bt_parallel_release as soon as possible. + */ +static bool +_bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; - /* release the previous buffer, if pinned */ - BTScanPosUnpinIfPinned(so->currPos); + so->numKilled = 0; /* just paranoia */ + so->markItemIndex = -1; /* ditto */ + + /* Initialize currPos for so->currPos */ + if (so->needPrimScan) + { + Assert(so->numArrayKeys); + + so->currPos.moreLeft = true; + so->currPos.moreRight = true; + so->needPrimScan = false; } - else + else if (ScanDirectionIsForward(dir)) { - /* Remember we left a page with data */ + so->currPos.moreLeft = false; so->currPos.moreRight = true; + } + else + { + so->currPos.moreLeft = true; + so->currPos.moreRight = false; + } - if (scan->parallel_scan != NULL) - { - /* - * Seize the scan to get the current block number; if the scan has - * ended already, bail out. - */ - status = _bt_parallel_seize(scan, &blkno, false); - BTScanPosUnpinIfPinned(so->currPos); - if (!status) - { - BTScanPosInvalidate(so->currPos); - return false; - } - } - else - { - /* Not parallel, so just use our own notion of the current page */ - blkno = so->currPos.currPage; - } + /* + * Attempt to load matching tuples from the page in so->currPos.buf. + * + * Note that _bt_readpage will finish initializing the so->currPos fields. + * _bt_readpage also releases parallel scan (even when it returns false). + */ + if (_bt_readpage(scan, dir, offnum, true)) + { + /* + * _bt_readpage succeeded. Drop the lock (and maybe the pin) on + * so->currPos.buf in preparation for btgettuple returning tuples. + */ + Assert(BTScanPosIsPinned(so->currPos)); + _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + return true; } - if (!_bt_readnextpage(scan, blkno, dir)) - return false; + /* There's no actually-matching data on the page in so->currPos.buf */ + _bt_unlockbuf(scan->indexRelation, so->currPos.buf); - /* We have at least one item to return as scan's next item */ - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + /* Call _bt_readnextpage using its _bt_steppage wrapper function */ + if (!_bt_steppage(scan, dir)) + return false; + /* _bt_readpage for a later page (now in so->currPos) succeeded */ return true; } /* - * _bt_readnextpage() -- Read next page containing valid data for scan + * _bt_readnextpage() -- Read next page containing valid data for _bt_next + * + * Caller's blkno is the next interesting page's link, taken from either the + * previously-saved right link or left link. lastcurrblkno is the page that + * was current at the point where the blkno link was saved, which we use to + * reason about concurrent page splits/page deletions during backwards scans + * (_bt_parallel_seize also requires it, regardless of scan direction). + * + * On entry, caller shouldn't hold any locks or pins on any page (we work + * directly off of blkno and lastcurrblkno instead). Parallel scan callers + * must have seized the scan before calling here (blkno and lastcurrblkno + * arguments should come from the seized scan). * * On success exit, so->currPos is updated to contain data from the next - * interesting page, and we return true. Caller must release the lock (and - * maybe the pin) on the buffer on success exit. + * interesting page, and we return true (parallel scan callers should not use + * so->currPos to determine which page to scan next, though). We hold a pin + * on the buffer on success exit, except when _bt_drop_lock_and_maybe_pin + * decided it was safe to eagerly drop the pin (to avoid blocking VACUUM). * * If there are no more matching records in the given direction, we drop all - * locks and pins, set so->currPos.buf to InvalidBuffer, and return false. + * locks and pins, invalidate so->currPos, and return false. + * + * We always release the scan for a parallel scan caller, regardless of + * success or failure; we'll call _bt_parallel_release as soon as possible. */ static bool -_bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir) +_bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, + BlockNumber lastcurrblkno, ScanDirection dir) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; - Relation rel; Page page; BTPageOpaque opaque; - bool status; - rel = scan->indexRelation; + Assert(so->currPos.currPage == lastcurrblkno || scan->parallel_scan != NULL); + Assert(!BTScanPosIsPinned(so->currPos)); + /* + * Remember that the scan already read lastcurrblkno, a page to the left + * of blkno (or remember reading a page to the right, for backwards scans) + */ if (ScanDirectionIsForward(dir)) - { - for (;;) - { - /* - * if we're at end of scan, give up and mark parallel scan as - * done, so that all the workers can finish their scan - */ - if (blkno == P_NONE || !so->currPos.moreRight) - { - _bt_parallel_done(scan); - BTScanPosInvalidate(so->currPos); - return false; - } - /* check for interrupts while we're not holding any buffer lock */ - CHECK_FOR_INTERRUPTS(); - /* step right one page */ - so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ); - page = BufferGetPage(so->currPos.buf); - opaque = BTPageGetOpaque(page); - /* check for deleted page */ - if (!P_IGNORE(opaque)) - { - PredicateLockPage(rel, blkno, scan->xs_snapshot); - /* see if there are any matches on this page */ - /* note that this will clear moreRight if we can stop */ - if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque), false)) - break; - } - else if (scan->parallel_scan != NULL) - { - /* allow next page be processed by parallel worker */ - _bt_parallel_release(scan, opaque->btpo_next); - } - - /* nope, keep going */ - if (scan->parallel_scan != NULL) - { - _bt_relbuf(rel, so->currPos.buf); - status = _bt_parallel_seize(scan, &blkno, false); - if (!status) - { - BTScanPosInvalidate(so->currPos); - return false; - } - } - else - { - blkno = opaque->btpo_next; - _bt_relbuf(rel, so->currPos.buf); - } - } - } + so->currPos.moreLeft = true; else + so->currPos.moreRight = true; + + for (;;) { /* - * Should only happen in parallel cases, when some other backend - * advanced the scan. + * if we're at end of scan, give up and mark parallel scan as done, so + * that all the workers can finish their scan */ - if (so->currPos.currPage != blkno) - { - BTScanPosUnpinIfPinned(so->currPos); - so->currPos.currPage = blkno; - } - - /* Done if we know that the left sibling link isn't of interest */ - if (!so->currPos.moreLeft) + if (blkno == P_NONE || + (ScanDirectionIsForward(dir) ? + !so->currPos.moreRight : !so->currPos.moreLeft)) { - BTScanPosUnpinIfPinned(so->currPos); _bt_parallel_done(scan); BTScanPosInvalidate(so->currPos); return false; } - /* - * Walk left to the next page with data. This is much more complex - * than the walk-right case because of the possibility that the page - * to our left splits while we are in flight to it, plus the - * possibility that the page we were on gets deleted after we leave - * it. See nbtree/README for details. - * - * It might be possible to rearrange this code to have less overhead - * in pinning and locking, but that would require capturing the left - * sibling block number when the page is initially read, and then - * optimistically starting there (rather than pinning the page twice). - * It is not clear that this would be worth the complexity. - */ - if (BTScanPosIsPinned(so->currPos)) - _bt_lockbuf(rel, so->currPos.buf, BT_READ); + if (ScanDirectionIsForward(dir)) + { + /* read blkno, but check for interrupts first */ + CHECK_FOR_INTERRUPTS(); + so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ); + } else - so->currPos.buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ); - - for (;;) { - /* Done if we know that the left sibling link isn't of interest */ - if (!so->currPos.moreLeft) + /* read blkno, avoiding race (also checks for interrupts) */ + so->currPos.buf = _bt_lock_and_validate_left(rel, &blkno, + lastcurrblkno); + if (so->currPos.buf == InvalidBuffer) { - _bt_relbuf(rel, so->currPos.buf); + /* must have been a concurrent deletion of leftmost page */ _bt_parallel_done(scan); BTScanPosInvalidate(so->currPos); return false; } + } - /* Step to next physical page */ - so->currPos.buf = _bt_walk_left(rel, so->currPos.buf); - - /* if we're physically at end of index, return failure */ - if (so->currPos.buf == InvalidBuffer) + page = BufferGetPage(so->currPos.buf); + opaque = BTPageGetOpaque(page); + lastcurrblkno = blkno; + if (likely(!P_IGNORE(opaque))) + { + /* see if there are any matches on this page */ + if (ScanDirectionIsForward(dir)) { - _bt_parallel_done(scan); - BTScanPosInvalidate(so->currPos); - return false; + /* note that this will clear moreRight if we can stop */ + if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque), false)) + break; + blkno = so->currPos.nextPage; } - - /* - * Okay, we managed to move left to a non-deleted page. Done if - * it's not half-dead and contains matching tuples. Else loop back - * and do it all again. - */ - page = BufferGetPage(so->currPos.buf); - opaque = BTPageGetOpaque(page); - if (!P_IGNORE(opaque)) + else { - PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot); - /* see if there are any matches on this page */ /* note that this will clear moreLeft if we can stop */ if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page), false)) break; + blkno = so->currPos.prevPage; } - else if (scan->parallel_scan != NULL) - { - /* allow next page be processed by parallel worker */ - _bt_parallel_release(scan, BufferGetBlockNumber(so->currPos.buf)); - } - - /* - * For parallel scans, get the last page scanned as it is quite - * possible that by the time we try to seize the scan, some other - * worker has already advanced the scan to a different page. We - * must continue based on the latest page scanned by any worker. - */ + } + else + { + /* _bt_readpage not called, so do all this for ourselves */ + if (ScanDirectionIsForward(dir)) + blkno = opaque->btpo_next; + else + blkno = opaque->btpo_prev; if (scan->parallel_scan != NULL) - { - _bt_relbuf(rel, so->currPos.buf); - status = _bt_parallel_seize(scan, &blkno, false); - if (!status) - { - BTScanPosInvalidate(so->currPos); - return false; - } - so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ); - } + _bt_parallel_release(scan, blkno, lastcurrblkno); } - } - return true; -} + /* no matching tuples on this page */ + _bt_relbuf(rel, so->currPos.buf); -/* - * _bt_parallel_readpage() -- Read current page containing valid data for scan - * - * On success, release lock and maybe pin on buffer. We return true to - * indicate success. - */ -static bool -_bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir) -{ - BTScanOpaque so = (BTScanOpaque) scan->opaque; - - Assert(!so->needPrimScan); - - _bt_initialize_more_data(so, dir); - - if (!_bt_readnextpage(scan, blkno, dir)) - return false; + /* parallel scan seizes another page (won't use so->currPos blkno) */ + if (scan->parallel_scan != NULL && + !_bt_parallel_seize(scan, &blkno, &lastcurrblkno, false)) + { + BTScanPosInvalidate(so->currPos); + return false; + } + } - /* We have at least one item to return as scan's next item */ + /* + * _bt_readpage succeeded. Drop the lock (and maybe the pin) on + * so->currPos.buf in preparation for btgettuple returning tuples. + */ + Assert(so->currPos.currPage == blkno); + Assert(BTScanPosIsPinned(so->currPos)); _bt_drop_lock_and_maybe_pin(scan, &so->currPos); return true; } /* - * _bt_walk_left() -- step left one page, if possible + * _bt_lock_and_validate_left() -- lock caller's left sibling blkno, + * recovering from concurrent page splits/page deletions when necessary + * + * Called during backwards scans, to deal with their unique concurrency rules. * - * The given buffer must be pinned and read-locked. This will be dropped - * before stepping left. On return, we have pin and read lock on the - * returned page, instead. + * blkno points to the block number of the page that we expect to move the + * scan to. We'll successfully move the scan there when we find that its + * right sibling link still points to lastcurrblkno (the page we just read). + * Otherwise, we have to figure out which page is the correct one for the scan + * to now read the hard way, reasoning about concurrent splits and deletions. + * See nbtree/README. * - * Returns InvalidBuffer if there is no page to the left (no lock is held - * in that case). + * On return, we have both a pin and a read lock on the returned page, whose + * block number will be set in *blkno. Returns InvalidBuffer if there is no + * page to the left (no lock or pin is held in that case). * * It is possible for the returned leaf page to be half-dead; caller must * check that condition and step left again when required. */ static Buffer -_bt_walk_left(Relation rel, Buffer buf) +_bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, + BlockNumber lastcurrblkno) { - Page page; - BTPageOpaque opaque; - - page = BufferGetPage(buf); - opaque = BTPageGetOpaque(page); + BlockNumber origblkno = *blkno; /* detects circular links */ for (;;) { - BlockNumber obknum; - BlockNumber lblkno; - BlockNumber blkno; + Buffer buf; + Page page; + BTPageOpaque opaque; int tries; - /* if we're at end of tree, release buf and return failure */ - if (P_LEFTMOST(opaque)) - { - _bt_relbuf(rel, buf); - break; - } - /* remember original page we are stepping left from */ - obknum = BufferGetBlockNumber(buf); - /* step left */ - blkno = lblkno = opaque->btpo_prev; - _bt_relbuf(rel, buf); /* check for interrupts while we're not holding any buffer lock */ CHECK_FOR_INTERRUPTS(); - buf = _bt_getbuf(rel, blkno, BT_READ); + buf = _bt_getbuf(rel, *blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); @@ -2443,7 +2382,7 @@ _bt_walk_left(Relation rel, Buffer buf) * If this isn't the page we want, walk right till we find what we * want --- but go no more than four hops (an arbitrary limit). If we * don't find the correct page by then, the most likely bet is that - * the original page got deleted and isn't in the sibling chain at all + * lastcurrblkno got deleted and isn't in the sibling chain at all * anymore, not that its left sibling got split more than four times. * * Note that it is correct to test P_ISDELETED not P_IGNORE here, @@ -2452,21 +2391,27 @@ _bt_walk_left(Relation rel, Buffer buf) tries = 0; for (;;) { - if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum) + if (likely(!P_ISDELETED(opaque) && + opaque->btpo_next == lastcurrblkno)) { /* Found desired page, return it */ return buf; } if (P_RIGHTMOST(opaque) || ++tries > 4) break; - blkno = opaque->btpo_next; - buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); + /* step right */ + *blkno = opaque->btpo_next; + buf = _bt_relandgetbuf(rel, buf, *blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); } - /* Return to the original page to see what's up */ - buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ); + /* + * Return to the original page (usually the page most recently read by + * _bt_readpage, which is passed by caller as lastcurrblkno) to see + * what's up with its prev sibling link + */ + buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); if (P_ISDELETED(opaque)) @@ -2482,31 +2427,42 @@ _bt_walk_left(Relation rel, Buffer buf) if (P_RIGHTMOST(opaque)) elog(ERROR, "fell off the end of index \"%s\"", RelationGetRelationName(rel)); - blkno = opaque->btpo_next; - buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); + lastcurrblkno = opaque->btpo_next; + buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); if (!P_ISDELETED(opaque)) break; } - - /* - * Now return to top of loop, resetting obknum to point to this - * nondeleted page, and try again. - */ } else { /* - * It wasn't deleted; the explanation had better be that the page - * to the left got split or deleted. Without this check, we'd go - * into an infinite loop if there's anything wrong. + * Original lastcurrblkno wasn't deleted; the explanation had + * better be that the page to the left got split or deleted. + * Without this check, we risk going into an infinite loop. */ - if (opaque->btpo_prev == lblkno) + if (opaque->btpo_prev == origblkno) elog(ERROR, "could not find left sibling of block %u in index \"%s\"", - obknum, RelationGetRelationName(rel)); - /* Okay to try again with new lblkno value */ + lastcurrblkno, RelationGetRelationName(rel)); + /* Okay to try again, since left sibling link changed */ } + + /* + * Original lastcurrblkno from caller was concurrently deleted (could + * also have been a great many concurrent left sibling page splits). + * Found a non-deleted page that should now act as our lastcurrblkno. + */ + if (P_LEFTMOST(opaque)) + { + /* New lastcurrblkno has no left sibling (concurrently deleted) */ + _bt_relbuf(rel, buf); + break; + } + + /* Start from scratch with new lastcurrblkno's blkno/prev link */ + *blkno = origblkno = opaque->btpo_prev; + _bt_relbuf(rel, buf); } return InvalidBuffer; @@ -2606,7 +2562,6 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; - Buffer buf; Page page; BTPageOpaque opaque; OffsetNumber start; @@ -2617,9 +2572,9 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) * version of _bt_search(). We don't maintain a stack since we know we * won't need it. */ - buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir)); + so->currPos.buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir)); - if (!BufferIsValid(buf)) + if (!BufferIsValid(so->currPos.buf)) { /* * Empty index. Lock the whole relation, as nothing finer to lock @@ -2630,8 +2585,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) return false; } - PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot); - page = BufferGetPage(buf); + page = BufferGetPage(so->currPos.buf); opaque = BTPageGetOpaque(page); Assert(P_ISLEAF(opaque)); @@ -2654,31 +2608,14 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) start = 0; /* keep compiler quiet */ } - /* remember which buffer we have pinned */ - so->currPos.buf = buf; - - _bt_initialize_more_data(so, dir); - /* * Now load data from the first page of the scan. */ - if (!_bt_readpage(scan, dir, start, true)) - { - /* - * There's no actually-matching data on this page. Try to advance to - * the next page. Return false if there's no matching data at all. - */ - _bt_unlockbuf(scan->indexRelation, so->currPos.buf); - if (!_bt_steppage(scan, dir)) - return false; - } - else - { - /* We have at least one item to return as scan's first item */ - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); - } + if (!_bt_readfirstpage(scan, start, dir)) + return false; /* OK, itemIndex says what to return */ + Assert(BTScanPosIsValid(so->currPos)); currItem = &so->currPos.items[so->currPos.itemIndex]; scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) @@ -2686,33 +2623,3 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) return true; } - -/* - * _bt_initialize_more_data() -- initialize moreLeft, moreRight and scan dir - * from currPos - */ -static inline void -_bt_initialize_more_data(BTScanOpaque so, ScanDirection dir) -{ - so->currPos.dir = dir; - if (so->needPrimScan) - { - Assert(so->numArrayKeys); - - so->currPos.moreLeft = true; - so->currPos.moreRight = true; - so->needPrimScan = false; - } - else if (ScanDirectionIsForward(dir)) - { - so->currPos.moreLeft = false; - so->currPos.moreRight = true; - } - else - { - so->currPos.moreLeft = true; - so->currPos.moreRight = false; - } - so->numKilled = 0; /* just paranoia */ - so->markItemIndex = -1; /* ditto */ -} diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 61ded00ca24..76be65123c8 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2407,7 +2407,7 @@ new_prim_scan: so->needPrimScan = true; /* ...but call _bt_first again */ if (scan->parallel_scan) - _bt_parallel_primscan_schedule(scan, pstate->prev_scan_page); + _bt_parallel_primscan_schedule(scan, so->currPos.currPage); /* Caller's tuple doesn't match the new qual */ return false; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 90a68375a2e..5fb523ecec3 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -925,13 +925,13 @@ typedef BTVacuumPostingData *BTVacuumPosting; * Index scans work a page at a time: we pin and read-lock the page, identify * all the matching items on the page and save them in BTScanPosData, then * release the read-lock while returning the items to the caller for - * processing. This approach minimizes lock/unlock traffic. Note that we - * keep the pin on the index page until the caller is done with all the items - * (this is needed for VACUUM synchronization, see nbtree/README). When we - * are ready to step to the next page, if the caller has told us any of the - * items were killed, we re-lock the page to mark them killed, then unlock. - * Finally we drop the pin and step to the next page in the appropriate - * direction. + * processing. This approach minimizes lock/unlock traffic. We must always + * drop the lock to make it okay for caller to process the returned items. + * Whether or not we can also release the pin during this window will vary. + * We drop the pin eagerly (when safe) to avoid blocking progress by VACUUM + * (see nbtree/README section about making concurrent TID recycling safe). + * We'll always release both the lock and the pin on the current page before + * moving on to its sibling page. * * If we are doing an index-only scan, we save the entire IndexTuple for each * matched item, otherwise only its heap TID and offset. The IndexTuples go @@ -950,28 +950,15 @@ typedef struct BTScanPosItem /* what we remember about each match */ typedef struct BTScanPosData { - Buffer buf; /* if valid, the buffer is pinned */ + Buffer buf; /* currPage buf (invalid means unpinned) */ - XLogRecPtr lsn; /* pos in the WAL stream when page was read */ + /* page details as of the saved position's call to _bt_readpage */ BlockNumber currPage; /* page referenced by items array */ - BlockNumber nextPage; /* page's right link when we scanned it */ + BlockNumber prevPage; /* currPage's left link */ + BlockNumber nextPage; /* currPage's right link */ + XLogRecPtr lsn; /* currPage's LSN */ - /* - * moreLeft and moreRight track whether we think there may be matching - * index entries to the left and right of the current page, respectively. - * We can clear the appropriate one of these flags when _bt_checkkeys() - * sets BTReadPageState.continuescan = false. - */ - bool moreLeft; - bool moreRight; - - /* - * Direction of the scan at the time that _bt_readpage was called. - * - * Used by btrestrpos to "restore" the scan's array keys by resetting each - * array to its first element's value (first in this scan direction). This - * avoids the need to directly track the array keys in btmarkpos. - */ + /* scan direction for the saved position's call to _bt_readpage */ ScanDirection dir; /* @@ -980,6 +967,13 @@ typedef struct BTScanPosData */ int nextTupleOffset; + /* + * moreLeft and moreRight track whether we think there may be matching + * index entries to the left and right of the current page, respectively. + */ + bool moreLeft; + bool moreRight; + /* * The items array is always ordered in index order (ie, increasing * indexoffset). When scanning backwards it is convenient to fill the @@ -1021,11 +1015,8 @@ typedef BTScanPosData *BTScanPos; ) #define BTScanPosInvalidate(scanpos) \ do { \ - (scanpos).currPage = InvalidBlockNumber; \ - (scanpos).nextPage = InvalidBlockNumber; \ (scanpos).buf = InvalidBuffer; \ - (scanpos).lsn = InvalidXLogRecPtr; \ - (scanpos).nextTupleOffset = 0; \ + (scanpos).currPage = InvalidBlockNumber; \ } while (0) /* We need one of these for each equality-type SK_SEARCHARRAY scan key */ @@ -1091,7 +1082,6 @@ typedef struct BTReadPageState OffsetNumber minoff; /* Lowest non-pivot tuple's offset */ OffsetNumber maxoff; /* Highest non-pivot tuple's offset */ IndexTuple finaltup; /* Needed by scans with array keys */ - BlockNumber prev_scan_page; /* previous _bt_parallel_release block */ Page page; /* Page being read */ /* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */ @@ -1192,12 +1182,14 @@ extern int btgettreeheight(Relation rel); /* * prototypes for internal functions in nbtree.c */ -extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, - bool first); -extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page); +extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, + BlockNumber *last_curr_page, bool first); +extern void _bt_parallel_release(IndexScanDesc scan, + BlockNumber next_scan_page, + BlockNumber curr_page); extern void _bt_parallel_done(IndexScanDesc scan); extern void _bt_parallel_primscan_schedule(IndexScanDesc scan, - BlockNumber prev_scan_page); + BlockNumber curr_page); /* * prototypes for functions in nbtdedup.c