diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index be0206d58ed..1a605f99442 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -698,6 +698,9 @@ nextpage: * "real" data item on the page to the right (if such a first item is * available). * + * - That tuples report that they have the expected number of attributes. + * INCLUDE index pivot tuples should not contain non-key attributes. + * * Furthermore, when state passed shows ShareLock held, and target page is * internal page, function also checks: * @@ -722,43 +725,35 @@ bt_target_page_check(BtreeCheckState *state) elog(DEBUG2, "verifying %u items on %s block %u", max, P_ISLEAF(topaque) ? "leaf" : "internal", state->targetblock); - - /* Check the number of attributes in high key if any */ - if (!P_RIGHTMOST(topaque)) + /* + * Check the number of attributes in high key. Note, rightmost page doesn't + * contain a high key, so nothing to check + */ + if (!P_RIGHTMOST(topaque) && + !_bt_check_natts(state->rel, state->target, P_HIKEY)) { - if (!_bt_check_natts(state->rel, state->target, P_HIKEY)) - { - ItemId itemid; - IndexTuple itup; - char *itid, - *htid; + ItemId itemid; + IndexTuple itup; - itemid = PageGetItemId(state->target, P_HIKEY); - itup = (IndexTuple) PageGetItem(state->target, itemid); - itid = psprintf("(%u,%u)", state->targetblock, P_HIKEY); - htid = psprintf("(%u,%u)", - ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), - ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid))); + itemid = PageGetItemId(state->target, P_HIKEY); + itup = (IndexTuple) PageGetItem(state->target, itemid); - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("wrong number of index tuple attributes for index \"%s\"", - RelationGetRelationName(state->rel)), - errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.", - itid, - BTreeTupGetNAtts(itup, state->rel), - P_ISLEAF(topaque) ? "heap" : "index", - htid, - (uint32) (state->targetlsn >> 32), - (uint32) state->targetlsn))); - } + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("wrong number of high key index tuple attributes in index \"%s\"", + RelationGetRelationName(state->rel)), + errdetail_internal("Index block=%u natts=%u block type=%s page lsn=%X/%X.", + state->targetblock, + BTreeTupleGetNAtts(itup, state->rel), + P_ISLEAF(topaque) ? "heap" : "index", + (uint32) (state->targetlsn >> 32), + (uint32) state->targetlsn))); } - /* * Loop over page items, starting from first non-highkey item, not high - * key (if any). Also, immediately skip "negative infinity" real item (if - * any). + * key (if any). Most tests are not performed for the "negative infinity" + * real item (if any). */ for (offset = P_FIRSTDATAKEY(topaque); offset <= max; @@ -791,7 +786,7 @@ bt_target_page_check(BtreeCheckState *state) tupsize, ItemIdGetLength(itemid), (uint32) (state->targetlsn >> 32), (uint32) state->targetlsn), - errhint("This could be a torn page problem"))); + errhint("This could be a torn page problem."))); /* Check the number of index tuple attributes */ if (!_bt_check_natts(state->rel, state->target, offset)) @@ -806,11 +801,11 @@ bt_target_page_check(BtreeCheckState *state) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("wrong number of index tuple attributes for index \"%s\"", + errmsg("wrong number of index tuple attributes in index \"%s\"", RelationGetRelationName(state->rel)), errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.", itid, - BTreeTupGetNAtts(itup, state->rel), + BTreeTupleGetNAtts(itup, state->rel), P_ISLEAF(topaque) ? "heap" : "index", htid, (uint32) (state->targetlsn >> 32), @@ -818,8 +813,8 @@ bt_target_page_check(BtreeCheckState *state) } /* - * Don't try to generate scankey using "negative infinity" garbage - * data on internal pages + * Don't try to generate scankey using "negative infinity" item on + * internal pages. They are always truncated to zero attributes. */ if (offset_is_negative_infinity(topaque, offset)) continue; @@ -1430,9 +1425,9 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset) * infinity item is either first or second line item, or there is none * within page. * - * "Negative infinity" tuple is a special corner case of pivot tuples, - * it has zero attributes while rest of pivot tuples have nkeyatts number - * of attributes. + * Negative infinity items are a special case among pivot tuples. They + * always have zero attributes, while all other pivot tuples always have + * nkeyatts attributes. * * Right-most pages don't have a high key, but could be said to * conceptually have a "positive infinity" high key. Thus, there is a diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index 9b3e0a2e6ea..ca690e522f7 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -19,7 +19,6 @@ #include "access/heapam.h" #include "access/itup.h" #include "access/tuptoaster.h" -#include "utils/rel.h" /* ---------------------------------------------------------------- @@ -32,6 +31,9 @@ * * This shouldn't leak any memory; otherwise, callers such as * tuplesort_putindextuplevalues() will be very unhappy. + * + * This shouldn't perform external table access provided caller + * does not pass values that are stored EXTERNAL. * ---------------- */ IndexTuple @@ -448,30 +450,49 @@ CopyIndexTuple(IndexTuple source) } /* - * Truncate tailing attributes from given index tuple leaving it with - * new_indnatts number of attributes. + * Create a palloc'd copy of an index tuple, leaving only the first + * leavenatts attributes remaining. + * + * Truncation is guaranteed to result in an index tuple that is no + * larger than the original. It is safe to use the IndexTuple with + * the original tuple descriptor, but caller must avoid actually + * accessing truncated attributes from returned tuple! In practice + * this means that index_getattr() must be called with special care, + * and that the truncated tuple should only ever be accessed by code + * under caller's direct control. + * + * It's safe to call this function with a buffer lock held, since it + * never performs external table access. If it ever became possible + * for index tuples to contain EXTERNAL TOAST values, then this would + * have to be revisited. */ IndexTuple -index_truncate_tuple(TupleDesc tupleDescriptor, IndexTuple olditup, - int new_indnatts) +index_truncate_tuple(TupleDesc sourceDescriptor, IndexTuple source, + int leavenatts) { - TupleDesc itupdesc = CreateTupleDescCopyConstr(tupleDescriptor); + TupleDesc truncdesc; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - IndexTuple newitup; + IndexTuple truncated; - Assert(tupleDescriptor->natts <= INDEX_MAX_KEYS); - Assert(new_indnatts > 0); - Assert(new_indnatts < tupleDescriptor->natts); + Assert(leavenatts < sourceDescriptor->natts); - index_deform_tuple(olditup, tupleDescriptor, values, isnull); + /* Create temporary descriptor to scribble on */ + truncdesc = palloc(TupleDescSize(sourceDescriptor)); + TupleDescCopy(truncdesc, sourceDescriptor); + truncdesc->natts = leavenatts; - /* form new tuple that will contain only key attributes */ - itupdesc->natts = new_indnatts; - newitup = index_form_tuple(itupdesc, values, isnull); - newitup->t_tid = olditup->t_tid; + /* Deform, form copy of tuple with fewer attributes */ + index_deform_tuple(source, truncdesc, values, isnull); + truncated = index_form_tuple(truncdesc, values, isnull); + truncated->t_tid = source->t_tid; + Assert(IndexTupleSize(truncated) <= IndexTupleSize(source)); + + /* + * Cannot leak memory here, TupleDescCopy() doesn't allocate any + * inner structure, so, plain pfree() should clean all allocated memory + */ + pfree(truncdesc); - FreeTupleDesc(itupdesc); - Assert(IndexTupleSize(newitup) <= IndexTupleSize(olditup)); - return newitup; + return truncated; } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 10509cfe8a5..dbd5c9238c5 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -84,7 +84,7 @@ static void _bt_checksplitloc(FindSplitData *state, int dataitemstoleft, Size firstoldonrightsz); static bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off); -static bool _bt_isequal(Relation idxrel, Page page, OffsetNumber offnum, +static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel); @@ -343,6 +343,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, IndexUniqueCheck checkUnique, bool *is_unique, uint32 *speculativeToken) { + TupleDesc itupdesc = RelationGetDescr(rel); int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); SnapshotData SnapshotDirty; OffsetNumber maxoff; @@ -402,7 +403,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * in real comparison, but only for ordering/finding items on * pages. - vadim 03/24/97 */ - if (!_bt_isequal(rel, page, offset, indnkeyatts, itup_scankey)) + if (!_bt_isequal(itupdesc, page, offset, indnkeyatts, itup_scankey)) break; /* we're past all the equal tuples */ /* okay, we gotta fetch the heap tuple ... */ @@ -566,7 +567,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, /* If scankey == hikey we gotta check the next page too */ if (P_RIGHTMOST(opaque)) break; - if (!_bt_isequal(rel, page, P_HIKEY, + if (!_bt_isequal(itupdesc, page, P_HIKEY, indnkeyatts, itup_scankey)) break; /* Advance to next non-dead page --- there must be one */ @@ -849,6 +850,13 @@ _bt_insertonpg(Relation rel, /* child buffer must be given iff inserting on an internal page */ Assert(P_ISLEAF(lpageop) == !BufferIsValid(cbuf)); + /* tuple must have appropriate number of attributes */ + Assert(!P_ISLEAF(lpageop) || + BTreeTupleGetNAtts(itup, rel) == + IndexRelationGetNumberOfAttributes(rel)); + Assert(P_ISLEAF(lpageop) || + BTreeTupleGetNAtts(itup, rel) == + IndexRelationGetNumberOfKeyAttributes(rel)); /* The caller should've finished any incomplete splits already. */ if (P_INCOMPLETE_SPLIT(lpageop)) @@ -956,6 +964,18 @@ _bt_insertonpg(Relation rel, } } + /* + * Every internal page should have exactly one negative infinity item + * at all times. Only _bt_split() and _bt_newroot() should add items + * that become negative infinity items through truncation, since + * they're the only routines that allocate new internal pages. Do not + * allow a retail insertion of a new item at the negative infinity + * offset. + */ + if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop)) + elog(ERROR, "cannot insert second negative infinity item in block %u of index \"%s\"", + itup_blkno, RelationGetRelationName(rel)); + /* Do the update. No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); @@ -1002,7 +1022,6 @@ _bt_insertonpg(Relation rel, xl_btree_metadata xlmeta; uint8 xlinfo; XLogRecPtr recptr; - IndexTupleData trunctuple; xlrec.offnum = itup_off; @@ -1038,17 +1057,8 @@ _bt_insertonpg(Relation rel, xlinfo = XLOG_BTREE_INSERT_META; } - /* Read comments in _bt_pgaddtup */ XLogRegisterBuffer(0, buf, REGBUF_STANDARD); - if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop)) - { - trunctuple = *itup; - trunctuple.t_info = sizeof(IndexTupleData); - XLogRegisterBufData(0, (char *) &trunctuple, - sizeof(IndexTupleData)); - } - else - XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup)); + XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup)); recptr = XLogInsert(RM_BTREE_ID, xlinfo); @@ -1203,6 +1213,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, itemid = PageGetItemId(origpage, P_HIKEY); itemsz = ItemIdGetLength(itemid); item = (IndexTuple) PageGetItem(origpage, itemid); + Assert(BTreeTupleGetNAtts(item, rel) == indnkeyatts); if (PageAddItem(rightpage, (Item) item, itemsz, rightoff, false, false) == InvalidOffsetNumber) { @@ -1235,20 +1246,25 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, } /* - * We must truncate included attributes of the "high key" item, before - * insert it onto the leaf page. It's the only point in insertion - * process, where we perform truncation. All other functions work with - * this high key and do not change it. + * Truncate non-key (INCLUDE) attributes of the high key item before + * inserting it on the left page. This only needs to happen at the leaf + * level, since in general all pivot tuple values originate from leaf + * level high keys. This isn't just about avoiding unnecessary work, + * though; truncating unneeded key attributes (more aggressive suffix + * truncation) can only be performed at the leaf level anyway. This is + * because a pivot tuple in a grandparent page must guide a search not + * only to the correct parent page, but also to the correct leaf page. */ if (indnatts != indnkeyatts && isleaf) { - lefthikey = _bt_truncate_tuple(rel, item); + lefthikey = _bt_nonkey_truncate(rel, item); itemsz = IndexTupleSize(lefthikey); itemsz = MAXALIGN(itemsz); } else lefthikey = item; + Assert(BTreeTupleGetNAtts(lefthikey, rel) == indnkeyatts); if (PageAddItem(leftpage, (Item) lefthikey, itemsz, leftoff, false, false) == InvalidOffsetNumber) { @@ -1258,6 +1274,9 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, origpagenumber, RelationGetRelationName(rel)); } leftoff = OffsetNumberNext(leftoff); + /* be tidy */ + if (lefthikey != item) + pfree(lefthikey); /* * Now transfer all the data items to the appropriate page. @@ -2143,7 +2162,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) left_item = (IndexTuple) palloc(left_item_sz); left_item->t_info = left_item_sz; BTreeInnerTupleSetDownLink(left_item, lbkno); - BTreeTupSetNAtts(left_item, 0); + BTreeTupleSetNAtts(left_item, 0); /* * Create downlink item for right page. The key for it is obtained from @@ -2180,6 +2199,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) * Note: we *must* insert the two items in item-number order, for the * benefit of _bt_restore_page(). */ + Assert(BTreeTupleGetNAtts(left_item, rel) == 0); if (PageAddItem(rootpage, (Item) left_item, left_item_sz, P_HIKEY, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add leftkey to new root page" @@ -2189,6 +2209,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) /* * insert the right page pointer into the new root page. */ + Assert(BTreeTupleGetNAtts(right_item, rel) == + IndexRelationGetNumberOfKeyAttributes(rel)); if (PageAddItem(rootpage, (Item) right_item, right_item_sz, P_FIRSTKEY, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add rightkey to new root page" @@ -2284,7 +2306,7 @@ _bt_pgaddtup(Page page, { trunctuple = *itup; trunctuple.t_info = sizeof(IndexTupleData); - BTreeTupSetNAtts(&trunctuple, 0); + BTreeTupleSetNAtts(&trunctuple, 0); itup = &trunctuple; itemsize = sizeof(IndexTupleData); } @@ -2303,10 +2325,9 @@ _bt_pgaddtup(Page page, * Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too. */ static bool -_bt_isequal(Relation idxrel, Page page, OffsetNumber offnum, +_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey) { - TupleDesc itupdesc = RelationGetDescr(idxrel); IndexTuple itup; int i; @@ -2316,16 +2337,11 @@ _bt_isequal(Relation idxrel, Page page, OffsetNumber offnum, itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); /* - * Index tuple shouldn't be truncated. Despite we technically could - * compare truncated tuple as well, this function should be only called - * for regular non-truncated leaf tuples and P_HIKEY tuple on - * rightmost leaf page. + * It's okay that we might perform a comparison against a truncated page + * high key when caller needs to determine if _bt_check_unique scan must + * continue on to the next page. Caller never asks us to compare non-key + * attributes within an INCLUDE index. */ - Assert((P_RIGHTMOST((BTPageOpaque) PageGetSpecialPointer(page)) || - offnum != P_HIKEY) - ? BTreeTupGetNAtts(itup, idxrel) == itupdesc->natts - : true); - for (i = 1; i <= keysz; i++) { AttrNumber attno; diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index ba689259127..beef089ba86 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1605,6 +1605,8 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) ItemPointerSetBlockNumber(&trunctuple.t_tid, target); else ItemPointerSetInvalid(&trunctuple.t_tid); + BTreeTupleSetNAtts(&trunctuple, 0); + if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 4c6fdcdd8aa..0bcfa10b864 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -154,7 +154,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, * We need to save the location of the index entry we chose in the * parent page on a stack. In case we split the tree, we'll use the * stack to work back up to the parent page. We also save the actual - * downlink (TID) to uniquely identify the index entry, in case it + * downlink (block) to uniquely identify the index entry, in case it * moves right while we're working lower in the tree. See the paper * by Lehman and Yao for how this is detected and handled. (We use the * child link to disambiguate duplicate keys in the index -- Lehman @@ -436,14 +436,7 @@ _bt_compare(Relation rel, IndexTuple itup; int i; - /* - * Check tuple has correct number of attributes. - */ - if (unlikely(!_bt_check_natts(rel, page, offnum))) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("tuple has wrong number of attributes in index \"%s\"", - RelationGetRelationName(rel)))); + Assert(_bt_check_natts(rel, page, offnum)); /* * Force result ">" if target item is first data item on an internal page @@ -1968,51 +1961,3 @@ _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir) so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ } - -/* - * Check if index tuple have appropriate number of attributes. - */ -bool -_bt_check_natts(Relation index, Page page, OffsetNumber offnum) -{ - int16 natts = IndexRelationGetNumberOfAttributes(index); - int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(index); - ItemId itemid; - IndexTuple itup; - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* - * Assert that mask allocated for number of keys in index tuple can fit - * maximum number of index keys. - */ - StaticAssertStmt(BT_N_KEYS_OFFSET_MASK >= INDEX_MAX_KEYS, - "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS"); - - itemid = PageGetItemId(page, offnum); - itup = (IndexTuple) PageGetItem(page, itemid); - - if (P_ISLEAF(opaque) && offnum >= P_FIRSTDATAKEY(opaque)) - { - /* - * Regular leaf tuples have as every index attributes - */ - return (BTreeTupGetNAtts(itup, index) == natts); - } - else if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) - { - /* - * Leftmost tuples on non-leaf pages have no attributes, or haven't - * INDEX_ALT_TID_MASK set in pg_upgraded indexes. - */ - return (BTreeTupGetNAtts(itup, index) == 0 || - ((itup->t_info & INDEX_ALT_TID_MASK) == 0)); - } - else - { - /* - * Pivot tuples stored in non-leaf pages and hikeys of leaf pages - * contain only key attributes - */ - return (BTreeTupGetNAtts(itup, index) == nkeyatts); - } -} diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index feba5e1c8fb..7deda9acac9 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -752,7 +752,7 @@ _bt_sortaddtup(Page page, { trunctuple = *itup; trunctuple.t_info = sizeof(IndexTupleData); - BTreeTupSetNAtts(&trunctuple, 0); + BTreeTupleSetNAtts(&trunctuple, 0); itup = &trunctuple; itemsize = sizeof(IndexTupleData); } @@ -790,7 +790,9 @@ _bt_sortaddtup(Page page, * placeholder for the pointer to the "high key" item; when we have * filled up the page, we will set linp0 to point to itemN and clear * linpN. On the other hand, if we find this is the last (rightmost) - * page, we leave the items alone and slide the linp array over. + * page, we leave the items alone and slide the linp array over. If + * the high key is to be truncated, offset 1 is deleted, and we insert + * the truncated high key at offset 1. * * 'last' pointer indicates the last offset added to the page. *---------- @@ -803,7 +805,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) OffsetNumber last_off; Size pgspc; Size itupsz; - BTPageOpaque pageop; int indnatts = IndexRelationGetNumberOfAttributes(wstate->index); int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(wstate->index); @@ -860,7 +861,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) ItemId ii; ItemId hii; IndexTuple oitup; - IndexTuple keytup; BTPageOpaque opageop = (BTPageOpaque) PageGetSpecialPointer(opage); /* Create new page of same level */ @@ -891,25 +891,38 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) if (indnkeyatts != indnatts && P_ISLEAF(opageop)) { + IndexTuple truncated; + Size truncsz; + /* - * We truncate included attributes of high key here. Subsequent - * insertions assume that hikey is already truncated, and so they - * need not worry about it, when copying the high key into the - * parent page as a downlink. + * Truncate any non-key attributes from high key on leaf level + * (i.e. truncate on leaf level if we're building an INCLUDE + * index). This is only done at the leaf level because + * downlinks in internal pages are either negative infinity + * items, or get their contents from copying from one level + * down. See also: _bt_split(). + * + * Since the truncated tuple is probably smaller than the + * original, it cannot just be copied in place (besides, we want + * to actually save space on the leaf page). We delete the + * original high key, and add our own truncated high key at the + * same offset. * - * The code above have just rearranged item pointers, but it - * didn't save any space. In order to save the space on page we - * have to truly shift index tuples on the page. But that's not - * so bad for performance, because we operating pd_upper and don't - * have to shift much of tuples memory. Shift of ItemId's is - * rather cheap, because they are small. + * Note that the page layout won't be changed very much. oitup + * is already located at the physical beginning of tuple space, + * so we only shift the line pointer array back and forth, and + * overwrite the latter portion of the space occupied by the + * original tuple. This is fairly cheap. */ - keytup = _bt_truncate_tuple(wstate->index, oitup); - - /* delete "wrong" high key, insert keytup as P_HIKEY. */ + truncated = _bt_nonkey_truncate(wstate->index, oitup); + truncsz = IndexTupleSize(truncated); PageIndexTupleDelete(opage, P_HIKEY); + _bt_sortaddtup(opage, truncsz, truncated, P_HIKEY); + pfree(truncated); - _bt_sortaddtup(opage, IndexTupleSize(keytup), keytup, P_HIKEY); + /* oitup should continue to point to the page's high key */ + hii = PageGetItemId(opage, P_HIKEY); + oitup = (IndexTuple) PageGetItem(opage, hii); } /* @@ -920,7 +933,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) if (state->btps_next == NULL) state->btps_next = _bt_pagestate(wstate, state->btps_level + 1); - Assert(state->btps_minkey != NULL); + Assert(BTreeTupleGetNAtts(state->btps_minkey, wstate->index) == + IndexRelationGetNumberOfKeyAttributes(wstate->index)); BTreeInnerTupleSetDownLink(state->btps_minkey, oblkno); _bt_buildadd(wstate, state->btps_next, state->btps_minkey); pfree(state->btps_minkey); @@ -928,11 +942,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) /* * Save a copy of the minimum key for the new page. We have to copy * it off the old page, not the new one, in case we are not at leaf - * level. Despite oitup is already initialized, it's important to get - * high key from the page, since we could have replaced it with - * truncated copy. See comment above. + * level. */ - oitup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)); state->btps_minkey = CopyIndexTuple(oitup); /* @@ -959,8 +970,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) last_off = P_FIRSTKEY; } - pageop = (BTPageOpaque) PageGetSpecialPointer(npage); - /* * If the new item is the first for its page, stash a copy for later. Note * this will only happen for the first item on a level; on later pages, @@ -969,14 +978,18 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) */ if (last_off == P_HIKEY) { + BTPageOpaque npageop; + Assert(state->btps_minkey == NULL); + npageop = (BTPageOpaque) PageGetSpecialPointer(npage); + /* * Truncate included attributes of the tuple that we're going to * insert into the parent page as a downlink */ - if (indnkeyatts != indnatts && P_ISLEAF(pageop)) - state->btps_minkey = _bt_truncate_tuple(wstate->index, itup); + if (indnkeyatts != indnatts && P_ISLEAF(npageop)) + state->btps_minkey = _bt_nonkey_truncate(wstate->index, itup); else state->btps_minkey = CopyIndexTuple(itup); } @@ -1030,7 +1043,8 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) } else { - Assert(s->btps_minkey != NULL); + Assert(BTreeTupleGetNAtts(s->btps_minkey, wstate->index) == + IndexRelationGetNumberOfKeyAttributes(wstate->index)); BTreeInnerTupleSetDownLink(s->btps_minkey, blkno); _bt_buildadd(wstate, s->btps_next, s->btps_minkey); pfree(s->btps_minkey); diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 76ffa6b0d47..0cecbf8e389 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -73,14 +73,14 @@ _bt_mkscankey(Relation rel, IndexTuple itup) indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); indoption = rel->rd_indoption; - Assert(indnkeyatts != 0); + Assert(indnkeyatts > 0); Assert(indnkeyatts <= indnatts); - Assert(BTreeTupGetNAtts(itup, rel) == indnatts || - BTreeTupGetNAtts(itup, rel) == indnkeyatts); + Assert(BTreeTupleGetNAtts(itup, rel) == indnatts || + BTreeTupleGetNAtts(itup, rel) == indnkeyatts); /* - * We'll execute search using ScanKey constructed on key columns. Non key - * (included) columns must be omitted. + * We'll execute search using scan key constructed on key columns. Non-key + * (INCLUDE index) columns are always omitted from scan keys. */ skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData)); @@ -1427,6 +1427,7 @@ _bt_checkkeys(IndexScanDesc scan, bool isNull; Datum test; + Assert(key->sk_attno <= BTreeTupleGetNAtts(tuple, scan->indexRelation)); /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { @@ -2082,29 +2083,133 @@ btproperty(Oid index_oid, int attno, } /* - * _bt_truncate_tuple() -- remove non-key (INCLUDE) attributes from index - * tuple. + * _bt_nonkey_truncate() -- create tuple without non-key suffix attributes. * - * Transforms an ordinal B-tree leaf index tuple into pivot tuple to be used - * as hikey or non-leaf page tuple with downlink. Note that t_tid offset - * will be overwritten in order to represent number of present tuple - * attributes. + * Returns truncated index tuple allocated in caller's memory context, with key + * attributes copied from caller's itup argument. Currently, suffix truncation + * is only performed to create pivot tuples in INCLUDE indexes, but some day it + * could be generalized to remove suffix attributes after the first + * distinguishing key attribute. + * + * Truncated tuple is guaranteed to be no larger than the original, which is + * important for staying under the 1/3 of a page restriction on tuple size. + * + * Note that returned tuple's t_tid offset will hold the number of attributes + * present, so the original item pointer offset is not represented. Caller + * should only change truncated tuple's downlink. */ IndexTuple -_bt_truncate_tuple(Relation idxrel, IndexTuple olditup) +_bt_nonkey_truncate(Relation rel, IndexTuple itup) { - IndexTuple newitup; - int nkeyattrs = IndexRelationGetNumberOfKeyAttributes(idxrel); + int nkeyattrs = IndexRelationGetNumberOfKeyAttributes(rel); + IndexTuple truncated; /* - * We're assuming to truncate only regular leaf index tuples which have - * both key and non-key attributes. + * We should only ever truncate leaf index tuples, which must have both key + * and non-key attributes. It's never okay to truncate a second time. */ - Assert(BTreeTupGetNAtts(olditup, idxrel) == IndexRelationGetNumberOfAttributes(idxrel)); + Assert(BTreeTupleGetNAtts(itup, rel) == + IndexRelationGetNumberOfAttributes(rel)); + + truncated = index_truncate_tuple(RelationGetDescr(rel), itup, nkeyattrs); + BTreeTupleSetNAtts(truncated, nkeyattrs); - newitup = index_truncate_tuple(RelationGetDescr(idxrel), - olditup, nkeyattrs); - BTreeTupSetNAtts(newitup, nkeyattrs); + return truncated; +} + +/* + * _bt_check_natts() -- Verify tuple has expected number of attributes. + * + * Returns value indicating if the expected number of attributes were found + * for a particular offset on page. This can be used as a general purpose + * sanity check. + * + * Testing a tuple directly with BTreeTupleGetNAtts() should generally be + * preferred to calling here. That's usually more convenient, and is always + * more explicit. Call here instead when offnum's tuple may be a negative + * infinity tuple that uses the pre-v11 on-disk representation, or when a low + * context check is appropriate. + */ +bool +_bt_check_natts(Relation rel, Page page, OffsetNumber offnum) +{ + int16 natts = IndexRelationGetNumberOfAttributes(rel); + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + IndexTuple itup; - return newitup; + /* + * We cannot reliably test a deleted or half-deleted page, since they have + * dummy high keys + */ + if (P_IGNORE(opaque)) + return true; + + Assert(offnum >= FirstOffsetNumber && + offnum <= PageGetMaxOffsetNumber(page)); + /* + * Mask allocated for number of keys in index tuple must be able to fit + * maximum possible number of index attributes + */ + StaticAssertStmt(BT_N_KEYS_OFFSET_MASK >= INDEX_MAX_KEYS, + "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS"); + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + + if (P_ISLEAF(opaque)) + { + if (offnum >= P_FIRSTDATAKEY(opaque)) + { + /* + * Leaf tuples that are not the page high key (non-pivot tuples) + * should never be truncated + */ + return BTreeTupleGetNAtts(itup, rel) == natts; + } + else + { + /* + * Rightmost page doesn't contain a page high key, so tuple was + * checked above as ordinary leaf tuple + */ + Assert(!P_RIGHTMOST(opaque)); + + /* Page high key tuple contains only key attributes */ + return BTreeTupleGetNAtts(itup, rel) == nkeyatts; + } + } + else /* !P_ISLEAF(opaque) */ + { + if (offnum == P_FIRSTDATAKEY(opaque)) + { + /* + * The first tuple on any internal page (possibly the first after + * its high key) is its negative infinity tuple. Negative infinity + * tuples are always truncated to zero attributes. They are a + * particular kind of pivot tuple. + * + * The number of attributes won't be explicitly represented if the + * negative infinity tuple was generated during a page split that + * occurred with a version of Postgres before v11. There must be a + * problem when there is an explicit representation that is + * non-zero, or when there is no explicit representation and the + * tuple is evidently not a pre-pg_upgrade tuple. + * + * Prior to v11, downlinks always had P_HIKEY as their offset. Use + * that to decide if the tuple is a pre-v11 tuple. + */ + return BTreeTupleGetNAtts(itup, rel) == 0 || + ((itup->t_info & INDEX_ALT_TID_MASK) == 0 && + ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY); + } + else + { + /* + * Tuple contains only key attributes despite on is it page high + * key or not + */ + return BTreeTupleGetNAtts(itup, rel) == nkeyatts; + } + + } } diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 0986ef07cf3..fb8c769df9a 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -248,17 +248,16 @@ btree_xlog_split(bool onleft, bool lhighkey, XLogReaderState *record) _bt_restore_page(rpage, datapos, datalen); - /* Non-leaf page should always have its high key logged. */ - Assert(isleaf || lhighkey); - /* * When the high key isn't present is the wal record, then we assume it to - * be equal to the first key on the right page. + * be equal to the first key on the right page. It must be from the leaf + * level. */ if (!lhighkey) { ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); + Assert(isleaf); left_hikey = (IndexTuple) PageGetItem(rpage, hiItemId); left_hikeysz = ItemIdGetLength(hiItemId); } @@ -620,7 +619,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record) * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer. * Note that we are not looking at tuple data here, just headers. */ - hoffnum = ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)); + hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid)); hitemid = PageGetItemId(hpage, hoffnum); /* @@ -805,6 +804,8 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); else ItemPointerSetInvalid(&trunctuple.t_tid); + BTreeTupleSetNAtts(&trunctuple, 0); + if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); @@ -915,6 +916,8 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); else ItemPointerSetInvalid(&trunctuple.t_tid); + BTreeTupleSetNAtts(&trunctuple, 0); + if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); diff --git a/src/include/access/itup.h b/src/include/access/itup.h index 555434ca803..bd3a7023809 100644 --- a/src/include/access/itup.h +++ b/src/include/access/itup.h @@ -155,7 +155,7 @@ extern Datum nocache_index_getattr(IndexTuple tup, int attnum, extern void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull); extern IndexTuple CopyIndexTuple(IndexTuple source); -extern IndexTuple index_truncate_tuple(TupleDesc tupleDescriptor, - IndexTuple olditup, int new_indnatts); +extern IndexTuple index_truncate_tuple(TupleDesc sourceDescriptor, + IndexTuple source, int leavenatts); #endif /* ITUP_H */ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 36619b220f1..7aa6afbbb80 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -186,59 +186,51 @@ typedef struct BTMetaPageData #define P_FIRSTKEY ((OffsetNumber) 2) #define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY) - /* - * B-tree index with INCLUDE clause has non-key (included) attributes, which - * are used solely in index-only scans. Those non-key attributes are present - * in leaf index tuples which point to corresponding heap tuples. However, - * tree also contains "pivot" tuples. Pivot tuples are used for navigation - * during tree traversal. Pivot tuples include tuples on non-leaf pages and - * high key tuples. Such, tuples don't need to included attributes, because - * they have no use during tree traversal. This is why we truncate them in - * order to save some space. Therefore, B-tree index with INCLUDE clause - * contain tuples with variable number of attributes. - * - * In order to keep on-disk compatibility with upcoming suffix truncation of - * pivot tuples, we store number of attributes present inside tuple itself. - * Thankfully, offset number is always unused in pivot tuple. So, we use free - * bit of index tuple flags as sign that offset have alternative meaning: it - * stores number of keys present in index tuple (12 bit is far enough for that). - * And we have 4 bits reserved for future usage. + * INCLUDE B-Tree indexes have non-key attributes. These are extra + * attributes that may be returned by index-only scans, but do not influence + * the order of items in the index (formally, non-key attributes are not + * considered to be part of the key space). Non-key attributes are only + * present in leaf index tuples whose item pointers actually point to heap + * tuples. All other types of index tuples (collectively, "pivot" tuples) + * only have key attributes, since pivot tuples only ever need to represent + * how the key space is separated. In general, any B-Tree index that has + * more than one level (i.e. any index that does not just consist of a + * metapage and a single leaf root page) must have some number of pivot + * tuples, since pivot tuples are used for traversing the tree. * - * Right now INDEX_ALT_TID_MASK is set only on truncation of non-key - * attributes of included indexes. But potentially every pivot index tuple - * might have INDEX_ALT_TID_MASK set. Then this tuple should have number of - * attributes correctly set in BT_N_KEYS_OFFSET_MASK, and in future it might - * use some bits of BT_RESERVED_OFFSET_MASK. + * We store the number of attributes present inside pivot tuples by abusing + * their item pointer offset field, since pivot tuples never need to store a + * real offset (downlinks only need to store a block number). The offset + * field only stores the number of attributes when the INDEX_ALT_TID_MASK + * bit is set (we never assume that pivot tuples must explicitly store the + * number of attributes, and currently do not bother storing the number of + * attributes unless indnkeyatts actually differs from indnatts). + * INDEX_ALT_TID_MASK is only used for pivot tuples at present, though it's + * possible that it will be used within non-pivot tuples in the future. Do + * not assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot + * tuple. * - * Non-pivot tuples might also use bit of BT_RESERVED_OFFSET_MASK. Despite - * they store heap tuple offset, higher bits of offset are always free. + * The 12 least significant offset bits are used to represent the number of + * attributes in INDEX_ALT_TID_MASK tuples, leaving 4 bits that are reserved + * for future use (BT_RESERVED_OFFSET_MASK bits). BT_N_KEYS_OFFSET_MASK should + * be large enough to store any number <= INDEX_MAX_KEYS. */ -#define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT /* flag indicating t_tid - * offset has an - * alternative meaning */ -#define BT_RESERVED_OFFSET_MASK 0xF000 /* mask of bits in t_tid offset - * reserved for future usage */ -#define BT_N_KEYS_OFFSET_MASK 0x0FFF /* mask of bits in t_tid offset - * holding number of attributes - * actually present in index tuple */ - -/* Acess to downlink block number */ +#define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT +#define BT_RESERVED_OFFSET_MASK 0xF000 +#define BT_N_KEYS_OFFSET_MASK 0x0FFF + +/* Get/set downlink block number */ #define BTreeInnerTupleGetDownLink(itup) \ ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid)) - #define BTreeInnerTupleSetDownLink(itup, blkno) \ ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)) -/* Set number of attributes to B-tree index tuple overriding t_tid offset */ -#define BTreeTupSetNAtts(itup, n) \ - do { \ - (itup)->t_info |= INDEX_ALT_TID_MASK; \ - ItemPointerSetOffsetNumber(&(itup)->t_tid, n); \ - } while(0) - -/* Get number of attributes in B-tree index tuple */ -#define BTreeTupGetNAtts(itup, index) \ +/* + * Get/set number of attributes within B-tree index tuple. Asserts should be + * removed when BT_RESERVED_OFFSET_MASK bits will be used. + */ +#define BTreeTupleGetNAtts(itup, rel) \ ( \ (itup)->t_info & INDEX_ALT_TID_MASK ? \ ( \ @@ -246,8 +238,14 @@ typedef struct BTMetaPageData ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \ ) \ : \ - IndexRelationGetNumberOfAttributes(index) \ + IndexRelationGetNumberOfAttributes(rel) \ ) +#define BTreeTupleSetNAtts(itup, n) \ + do { \ + (itup)->t_info |= INDEX_ALT_TID_MASK; \ + Assert(((n) & BT_RESERVED_OFFSET_MASK) == 0); \ + ItemPointerSetOffsetNumber(&(itup)->t_tid, (n) & BT_N_KEYS_OFFSET_MASK); \ + } while(0) /* * Operator strategy numbers for B-tree have been moved to access/stratnum.h, @@ -561,7 +559,6 @@ extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, Snapshot snapshot); -extern bool _bt_check_natts(Relation index, Page page, OffsetNumber offnum); /* * prototypes for functions in nbtutils.c @@ -590,7 +587,8 @@ extern bytea *btoptions(Datum reloptions, bool validate); extern bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); -extern IndexTuple _bt_truncate_tuple(Relation idxrel, IndexTuple olditup); +extern IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup); +extern bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum); /* * prototypes for functions in nbtvalidate.c diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out index 1d253ee77d6..b7d1812ec5c 100644 --- a/src/test/regress/expected/index_including.out +++ b/src/test/regress/expected/index_including.out @@ -1,81 +1,78 @@ /* * 1.test CREATE INDEX + * + * Deliberately avoid dropping objects in this section, to get some pg_dump + * coverage. */ -- Regular index with included columns -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c3,c4); +CREATE TABLE tbl_include_reg (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_reg SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c3,c4); -- must fail because of intersection of key and included columns -CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c1,c3); +CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c1,c3); ERROR: included columns must not intersect with key columns SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; - pg_get_indexdef --------------------------------------------------------------------------- - CREATE INDEX tbl_idx ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +WHERE i.indrelid = 'tbl_include_reg'::regclass ORDER BY c.relname; + pg_get_indexdef +-------------------------------------------------------------------------------------------------- + CREATE INDEX tbl_include_reg_idx ON public.tbl_include_reg USING btree (c1, c2) INCLUDE (c3, c4) (1 row) -DROP TABLE tbl; -- Unique index and unique constraint -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); -ALTER TABLE tbl add UNIQUE USING INDEX tbl_idx_unique; -ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); +CREATE TABLE tbl_include_unique1 (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_unique1 SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_include_unique1_idx_unique ON tbl_include_unique1 using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl_include_unique1 add UNIQUE USING INDEX tbl_include_unique1_idx_unique; +ALTER TABLE tbl_include_unique1 add UNIQUE (c1, c2) INCLUDE (c3, c4); SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; - pg_get_indexdef ---------------------------------------------------------------------------------------------- - CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_key ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) - CREATE UNIQUE INDEX tbl_idx_unique ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +WHERE i.indrelid = 'tbl_include_unique1'::regclass ORDER BY c.relname; + pg_get_indexdef +----------------------------------------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_include_unique1_c1_c2_c3_c4_key ON public.tbl_include_unique1 USING btree (c1, c2) INCLUDE (c3, c4) + CREATE UNIQUE INDEX tbl_include_unique1_idx_unique ON public.tbl_include_unique1 USING btree (c1, c2) INCLUDE (c3, c4) (2 rows) -DROP TABLE tbl; -- Unique index and unique constraint. Both must fail. -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); -ERROR: could not create unique index "tbl_idx_unique" +CREATE TABLE tbl_include_unique2 (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_unique2 SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_include_unique2_idx_unique ON tbl_include_unique2 using btree (c1, c2) INCLUDE (c3, c4); +ERROR: could not create unique index "tbl_include_unique2_idx_unique" DETAIL: Key (c1, c2)=(1, 2) is duplicated. -ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); -ERROR: could not create unique index "tbl_c1_c2_c3_c4_key" +ALTER TABLE tbl_include_unique2 add UNIQUE (c1, c2) INCLUDE (c3, c4); +ERROR: could not create unique index "tbl_include_unique2_c1_c2_c3_c4_key" DETAIL: Key (c1, c2)=(1, 2) is duplicated. -DROP TABLE tbl; -- PK constraint -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +CREATE TABLE tbl_include_pk (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_pk SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl_include_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; - pg_get_indexdef ----------------------------------------------------------------------------------- - CREATE UNIQUE INDEX tbl_pkey ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +WHERE i.indrelid = 'tbl_include_pk'::regclass ORDER BY c.relname; + pg_get_indexdef +-------------------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_include_pk_pkey ON public.tbl_include_pk USING btree (c1, c2) INCLUDE (c3, c4) (1 row) -DROP TABLE tbl; -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); -ALTER TABLE tbl add PRIMARY KEY USING INDEX tbl_idx_unique; +CREATE TABLE tbl_include_box (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_box SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_include_box_idx_unique ON tbl_include_box using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl_include_box add PRIMARY KEY USING INDEX tbl_include_box_idx_unique; SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; - pg_get_indexdef ----------------------------------------------------------------------------------------- - CREATE UNIQUE INDEX tbl_idx_unique ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4) +WHERE i.indrelid = 'tbl_include_box'::regclass ORDER BY c.relname; + pg_get_indexdef +---------------------------------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX tbl_include_box_idx_unique ON public.tbl_include_box USING btree (c1, c2) INCLUDE (c3, c4) (1 row) -DROP TABLE tbl; -- PK constraint. Must fail. -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); -ERROR: could not create unique index "tbl_pkey" +CREATE TABLE tbl_include_box_pk (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_box_pk SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl_include_box_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +ERROR: could not create unique index "tbl_include_box_pk_pkey" DETAIL: Key (c1, c2)=(1, 2) is duplicated. -DROP TABLE tbl; /* * 2. Test CREATE TABLE with constraint */ diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index ac0fb539e98..8afb1f2f7e3 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -185,6 +185,12 @@ sql_sizing|f sql_sizing_profiles|f stud_emp|f student|f +tbl_include_box|t +tbl_include_box_pk|f +tbl_include_pk|t +tbl_include_reg|t +tbl_include_unique1|t +tbl_include_unique2|f tenk1|t tenk2|t test_range_excl|t diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql index caedc9866d3..f83b2c64ac6 100644 --- a/src/test/regress/sql/index_including.sql +++ b/src/test/regress/sql/index_including.sql @@ -1,59 +1,56 @@ /* * 1.test CREATE INDEX + * + * Deliberately avoid dropping objects in this section, to get some pg_dump + * coverage. */ -- Regular index with included columns -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c3,c4); +CREATE TABLE tbl_include_reg (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_reg SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c3,c4); -- must fail because of intersection of key and included columns -CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c1,c3); +CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c1,c3); SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; -DROP TABLE tbl; +WHERE i.indrelid = 'tbl_include_reg'::regclass ORDER BY c.relname; -- Unique index and unique constraint -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); -ALTER TABLE tbl add UNIQUE USING INDEX tbl_idx_unique; -ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); +CREATE TABLE tbl_include_unique1 (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_unique1 SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_include_unique1_idx_unique ON tbl_include_unique1 using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl_include_unique1 add UNIQUE USING INDEX tbl_include_unique1_idx_unique; +ALTER TABLE tbl_include_unique1 add UNIQUE (c1, c2) INCLUDE (c3, c4); SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; -DROP TABLE tbl; +WHERE i.indrelid = 'tbl_include_unique1'::regclass ORDER BY c.relname; -- Unique index and unique constraint. Both must fail. -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); -ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4); -DROP TABLE tbl; +CREATE TABLE tbl_include_unique2 (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_unique2 SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_include_unique2_idx_unique ON tbl_include_unique2 using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl_include_unique2 add UNIQUE (c1, c2) INCLUDE (c3, c4); -- PK constraint -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); +CREATE TABLE tbl_include_pk (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_pk SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl_include_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; -DROP TABLE tbl; +WHERE i.indrelid = 'tbl_include_pk'::regclass ORDER BY c.relname; -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4); -ALTER TABLE tbl add PRIMARY KEY USING INDEX tbl_idx_unique; +CREATE TABLE tbl_include_box (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_box SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +CREATE UNIQUE INDEX tbl_include_box_idx_unique ON tbl_include_box using btree (c1, c2) INCLUDE (c3, c4); +ALTER TABLE tbl_include_box add PRIMARY KEY USING INDEX tbl_include_box_idx_unique; SELECT pg_get_indexdef(i.indexrelid) FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid -WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname; -DROP TABLE tbl; +WHERE i.indrelid = 'tbl_include_box'::regclass ORDER BY c.relname; -- PK constraint. Must fail. -CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box); -INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; -ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); -DROP TABLE tbl; +CREATE TABLE tbl_include_box_pk (c1 int, c2 int, c3 int, c4 box); +INSERT INTO tbl_include_box_pk SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x; +ALTER TABLE tbl_include_box_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4); /*