@ -32,11 +32,8 @@
typedef struct GinPageOpaqueData
{
BlockNumber rightlink ; /* next page if any */
OffsetNumber maxoff ; /* number entries on GIN_DATA page: number of
* heap ItemPointers on GIN_DATA | GIN_LEAF page
* or number of PostingItems on GIN_DATA &
* ~ GIN_LEAF page . On GIN_LIST page , number of
* heap tuples . */
OffsetNumber maxoff ; /* number of PostingItems on GIN_DATA & ~GIN_LEAF page.
* On GIN_LIST page , number of heap tuples . */
uint16 flags ; /* see bit definitions below */
} GinPageOpaqueData ;
@ -49,6 +46,7 @@ typedef GinPageOpaqueData *GinPageOpaque;
# define GIN_LIST (1 << 4)
# define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
# define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not updated */
# define GIN_COMPRESSED (1 << 7)
/* Page numbers of fixed-location pages */
# define GIN_METAPAGE_BLKNO (0)
@ -88,7 +86,12 @@ typedef struct GinMetaPageData
* GIN version number ( ideally this should have been at the front , but too
* late now . Don ' t move it ! )
*
* Currently 1 ( for indexes initialized in 9.1 or later )
* Currently 2 ( for indexes initialized in 9.4 or later )
*
* Version 1 ( indexes initialized in version 9.1 , 9.2 or 9.3 ) , is
* compatible , but may contain uncompressed posting tree ( leaf ) pages and
* posting lists . They will be converted to compressed format when
* modified .
*
* Version 0 ( indexes initialized in 9.0 or before ) is compatible but may
* be missing null entries , including both null keys and placeholders .
@ -97,7 +100,7 @@ typedef struct GinMetaPageData
int32 ginVersion ;
} GinMetaPageData ;
# define GIN_CURRENT_VERSION 1
# define GIN_CURRENT_VERSION 2
# define GinPageGetMeta(p) \
( ( GinMetaPageData * ) PageGetContents ( p ) )
@ -116,6 +119,8 @@ typedef struct GinMetaPageData
# define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
# define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
# define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
# define GinPageIsCompressed(page) ( GinPageGetOpaque(page)->flags & GIN_COMPRESSED )
# define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
# define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
# define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
@ -213,13 +218,16 @@ typedef signed char GinNullCategory;
# define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
# define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
# define GinGetPostingOffset(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
# define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,n)
# define GinGetPosting(itup) ((ItemPointer) ((char*)(itup) + GinGetPostingOffset(itup)))
# define GIN_ITUP_COMPRESSED (1 << 31)
# define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
# define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
# define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
# define GinItupIsCompressed(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED)
# define GinMaxItemSize \
MAXALIGN_DOWN ( ( ( BLCKSZ - SizeOfPageHeaderData - \
MAXALIGN ( sizeof ( GinPageOpaqueData ) ) ) / 3 - sizeof ( ItemIdData ) ) )
Min ( INDEX_SIZE_MASK , \
MAXALIGN_DOWN ( ( ( BLCKSZ - SizeOfPageHeaderData - \
MAXALIGN ( sizeof ( GinPageOpaqueData ) ) ) / 6 - sizeof ( ItemIdData ) ) ) )
/*
* Access macros for non - leaf entry tuples
@ -230,30 +238,59 @@ typedef signed char GinNullCategory;
/*
* Data ( posting tree ) pages
*
* Posting tree pages don ' t store regular tuples . Non - leaf pages contain
* PostingItems , which are pairs of ItemPointers and child block numbers .
* Leaf pages contain GinPostingLists and an uncompressed array of item
* pointers .
*
* In a leaf page , the compressed posting lists are stored after the regular
* page header , one after each other . Although we don ' t store regular tuples ,
* pd_lower is used to indicate the end of the posting lists . After that , free
* space follows . This layout is compatible with the " standard " heap and
* index page layout described in bufpage . h , so that we can e . g set buffer_std
* when writing WAL records .
*
* In the special space is the GinPageOpaque struct .
*/
# define GinDataLeafPageGetPostingList(page) \
( GinPostingList * ) ( ( PageGetContents ( page ) + MAXALIGN ( sizeof ( ItemPointerData ) ) ) )
# define GinDataLeafPageGetPostingListSize(page) \
( ( ( PageHeader ) page ) - > pd_lower - MAXALIGN ( SizeOfPageHeaderData ) - MAXALIGN ( sizeof ( ItemPointerData ) ) )
# define GinDataLeafPageSetPostingListSize(page, size) \
{ \
Assert ( size < = GinDataLeafMaxContentSize ) ; \
( ( PageHeader ) page ) - > pd_lower = ( size ) + MAXALIGN ( SizeOfPageHeaderData ) + MAXALIGN ( sizeof ( ItemPointerData ) ) ; \
}
# define GinDataLeafPageIsEmpty(page) \
( GinPageIsCompressed ( page ) ? ( GinDataLeafPageGetPostingListSize ( page ) = = 0 ) : ( GinPageGetOpaque ( page ) - > maxoff < FirstOffsetNumber ) )
# define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
# define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
/*
* Pointer to the data portion of a posting tree page . For internal pages ,
* that ' s the beginning of the array of PostingItems . For compressed leaf
* pages , the first compressed posting list . For uncompressed ( pre - 9.4 ) leaf
* pages , it ' s the beginning of the ItemPointer array .
*/
# define GinDataPageGetData(page) \
( PageGetContents ( page ) + MAXALIGN ( sizeof ( ItemPointerData ) ) )
/* non-leaf pages contain PostingItems */
# define GinDataPageGetPostingItem(page, i) \
( ( PostingItem * ) ( GinDataPageGetData ( page ) + ( ( i ) - 1 ) * sizeof ( PostingItem ) ) )
/* leaf pages contain ItemPointers */
# define GinDataPageGetItemPointer(page, i) \
( ( ItemPointer ) ( GinDataPageGetData ( page ) + ( ( i ) - 1 ) * sizeof ( ItemPointerData ) ) )
# define GinSizeOfDataPageItem(page) \
( GinPageIsLeaf ( page ) ? sizeof ( ItemPointerData ) : sizeof ( PostingItem ) )
# define GinDataPageGetFreeSpace(page) \
# define GinNonLeafDataPageGetFreeSpace(page) \
( BLCKSZ - MAXALIGN ( SizeOfPageHeaderData ) \
- MAXALIGN ( sizeof ( ItemPointerData ) ) \
- GinPageGetOpaque ( page ) - > maxoff * GinSizeOfDataPageItem ( page ) \
- GinPageGetOpaque ( page ) - > maxoff * sizeof ( PostingItem ) \
- MAXALIGN ( sizeof ( GinPageOpaqueData ) ) )
# define GinMaxLeafDataItems \
( ( BLCKSZ - MAXALIGN ( SizeOfPageHeaderData ) - \
MAXALIGN ( sizeof ( ItemPointerData ) ) - \
MAXALIGN ( sizeof ( GinPageOpaqueData ) ) ) \
/ sizeof ( ItemPointerData ) )
# define GinDataLeafMaxContentSize \
( BLCKSZ - MAXALIGN ( SizeOfPageHeaderData ) \
- MAXALIGN ( sizeof ( ItemPointerData ) ) \
- MAXALIGN ( sizeof ( GinPageOpaqueData ) ) )
/*
* List pages
@ -318,6 +355,23 @@ typedef struct GinState
Oid supportCollation [ INDEX_MAX_KEYS ] ;
} GinState ;
/*
* A compressed posting list .
*
* Note : This requires 2 - byte alignment .
*/
typedef struct
{
ItemPointerData first ; /* first item in this posting list (unpacked) */
uint16 nbytes ; /* number of bytes that follow */
unsigned char bytes [ 1 ] ; /* varbyte encoded items (variable length) */
} GinPostingList ;
# define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
# define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
/* XLog stuff */
# define XLOG_GIN_CREATE_INDEX 0x00
@ -328,18 +382,21 @@ typedef struct ginxlogCreatePostingTree
{
RelFileNode node ;
BlockNumber blkno ;
uint32 nitem ;
/* follows list of heap's ItemPointer */
uint32 size ;
/* A compressed posting list follows */
} ginxlogCreatePostingTree ;
# define XLOG_GIN_INSERT 0x20
typedef struct ginxlogInsert
/*
* The format of the insertion record varies depending on the page type .
* ginxlogInsert is the common part between all variants .
*/
typedef struct
{
RelFileNode node ;
BlockNumber blkno ;
uint16 flags ; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
OffsetNumber offset ;
/*
* FOLLOWS :
@ -358,17 +415,25 @@ typedef struct ginxlogInsert
typedef struct
{
OffsetNumber offset ;
bool isDelete ;
IndexTupleData tuple ; /* variable length */
} ginxlogInsertEntry ;
typedef struct
{
OffsetNumber nitem ;
ItemPointerData items [ 1 ] ; /* variable length */
} ginxlogInsertDataLeaf ;
uint16 length ;
uint16 unmodifiedsize ;
/* In an insert to an internal data page, the payload is a PostingItem */
/* compressed segments, variable length */
char newdata [ 1 ] ;
} ginxlogRecompressDataLeaf ;
typedef struct
{
OffsetNumber offset ;
PostingItem newitem ;
} ginxlogInsertDataInternal ;
# define XLOG_GIN_SPLIT 0x30
@ -401,25 +466,58 @@ typedef struct
/* FOLLOWS: IndexTuples */
} ginxlogSplitEntry ;
typedef struct
{
uint16 lsize ;
uint16 rsize ;
ItemPointerData lrightbound ; /* new right bound of left page */
ItemPointerData rrightbound ; /* new right bound of right page */
/* FOLLOWS: new compressed posting lists of left and right page */
char newdata [ 1 ] ;
} ginxlogSplitDataLeaf ;
typedef struct
{
OffsetNumber separator ;
OffsetNumber nitem ;
ItemPointerData rightbound ;
/* FOLLOWS: array of ItemPointers (for leaf) or PostingItems (non-leaf) */
} ginxlogSplitData ;
/* FOLLOWS: array of PostingItems */
} ginxlogSplitDataInternal ;
/*
* Vacuum simply WAL - logs the whole page , when anything is modified . This
* functionally identical heap_newpage records , but is kept separate for
* debugging purposes . ( When inspecting the WAL stream , it ' s easier to see
* what ' s going on when GIN vacuum records are marked as such , not as heap
* records . ) This is currently only used for entry tree leaf pages .
*/
# define XLOG_GIN_VACUUM_PAGE 0x40
typedef struct ginxlogVacuumPage
{
RelFileNode node ;
BlockNumber blkno ;
OffsetNumber nitem ;
/* follows content of page */
uint16 hole_offset ; /* number of bytes before "hole" */
uint16 hole_length ; /* number of bytes in "hole" */
/* entire page contents (minus the hole) follow at end of record */
} ginxlogVacuumPage ;
/*
* Vacuuming posting tree leaf page is WAL - logged like recompression caused
* by insertion .
*/
# define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
typedef struct ginxlogVacuumDataLeafPage
{
RelFileNode node ;
BlockNumber blkno ;
ginxlogRecompressDataLeaf data ;
} ginxlogVacuumDataLeafPage ;
# define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
@ -506,6 +604,7 @@ typedef struct GinBtreeStack
BlockNumber blkno ;
Buffer buffer ;
OffsetNumber off ;
ItemPointerData iptr ;
/* predictNumber contains predicted number of pages on current level */
uint32 predictNumber ;
struct GinBtreeStack * parent ;
@ -513,6 +612,14 @@ typedef struct GinBtreeStack
typedef struct GinBtreeData * GinBtree ;
/* Return codes for GinBtreeData.placeToPage method */
typedef enum
{
UNMODIFIED ,
INSERTED ,
SPLIT
} GinPlaceToPageRC ;
typedef struct GinBtreeData
{
/* search methods */
@ -523,8 +630,7 @@ typedef struct GinBtreeData
/* insert methods */
OffsetNumber ( * findChildPtr ) ( GinBtree , Page , BlockNumber , OffsetNumber ) ;
bool ( * placeToPage ) ( GinBtree , Buffer , OffsetNumber , void * , BlockNumber , XLogRecData * * ) ;
Page ( * splitPage ) ( GinBtree , Buffer , Buffer , OffsetNumber , void * , BlockNumber , XLogRecData * * ) ;
GinPlaceToPageRC ( * placeToPage ) ( GinBtree , Buffer , GinBtreeStack * , void * , BlockNumber , XLogRecData * * , Page * , Page * ) ;
void * ( * prepareDownlink ) ( GinBtree , Buffer ) ;
void ( * fillRoot ) ( GinBtree , Page , BlockNumber , Page , BlockNumber , Page ) ;
@ -577,14 +683,17 @@ extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
/* ginentrypage.c */
extern IndexTuple GinFormTuple ( GinState * ginstate ,
OffsetNumber attnum , Datum key , GinNullCategory category ,
ItemPointerData * ipd , uint32 nipd , bool errorTooBig ) ;
extern void GinShortenTuple ( IndexTuple itup , uint32 nipd ) ;
Pointer data , Size dataSize , int nipd , bool errorTooBig ) ;
extern void ginPrepareEntryScan ( GinBtree btree , OffsetNumber attnum ,
Datum key , GinNullCategory category ,
GinState * ginstate ) ;
extern void ginEntryFillRoot ( GinBtree btree , Page root , BlockNumber lblkno , Page lpage , BlockNumber rblkno , Page rpage ) ;
extern ItemPointer ginReadTuple ( GinState * ginstate , OffsetNumber attnum ,
IndexTuple itup , int * nitems ) ;
/* gindatapage.c */
extern ItemPointer GinDataLeafPageGetItems ( Page page , int * nitems ) ;
extern int GinDataLeafPageGetItemsToTbm ( Page page , TIDBitmap * tbm ) ;
extern BlockNumber createPostingTree ( Relation index ,
ItemPointerData * items , uint32 nitems ,
GinStatsData * buildStats ) ;
@ -598,6 +707,15 @@ extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBl
extern void ginDataFillRoot ( GinBtree btree , Page root , BlockNumber lblkno , Page lpage , BlockNumber rblkno , Page rpage ) ;
extern void ginPrepareDataScan ( GinBtree btree , Relation index , BlockNumber rootBlkno ) ;
/*
* This is declared in ginvacuum . c , but is passed between ginVacuumItemPointers
* and ginVacuumPostingTreeLeaf and as an opaque struct , so we need a forward
* declaration for it .
*/
typedef struct GinVacuumState GinVacuumState ;
extern void ginVacuumPostingTreeLeaf ( Relation rel , Buffer buf , GinVacuumState * gvs ) ;
/* ginscan.c */
/*
@ -679,7 +797,7 @@ typedef struct GinScanEntryData
/* used for Posting list and one page in Posting tree */
ItemPointerData * list ;
uint32 nlist ;
int nlist ;
OffsetNumber offset ;
bool isFinished ;
@ -717,6 +835,8 @@ extern Datum gingetbitmap(PG_FUNCTION_ARGS);
/* ginvacuum.c */
extern Datum ginbulkdelete ( PG_FUNCTION_ARGS ) ;
extern Datum ginvacuumcleanup ( PG_FUNCTION_ARGS ) ;
extern ItemPointer ginVacuumItemPointers ( GinVacuumState * gvs ,
ItemPointerData * items , int nitem , int * nremaining ) ;
/* ginbulk.c */
typedef struct GinEntryAccumulator
@ -770,11 +890,17 @@ extern void ginInsertCleanup(GinState *ginstate,
bool vac_delay , IndexBulkDeleteResult * stats ) ;
/* ginpostinglist.c */
extern uint32 ginMergeItemPointers ( ItemPointerData * dst ,
extern GinPostingList * ginCompressPostingList ( const ItemPointer ptrs , int nptrs ,
int maxsize , int * nwritten ) ;
extern int ginPostingListDecodeAllSegmentsToTbm ( GinPostingList * ptr , int totalsize , TIDBitmap * tbm ) ;
extern ItemPointer ginPostingListDecodeAllSegments ( GinPostingList * ptr , int len , int * ndecoded ) ;
extern ItemPointer ginPostingListDecode ( GinPostingList * ptr , int * ndecoded ) ;
extern int ginMergeItemPointers ( ItemPointerData * dst ,
ItemPointerData * a , uint32 na ,
ItemPointerData * b , uint32 nb ) ;
/*
* Merging the results of several gin scans compares item pointers a lot ,
* so we want this to be inlined . But if the compiler doesn ' t support that ,