mirror of https://github.com/postgres/postgres
parent
427c6b5b98
commit
8a3631f8d8
@ -0,0 +1,145 @@ |
||||
#include "postgres.h" |
||||
|
||||
#include <float.h> |
||||
|
||||
#include "access/gist.h" |
||||
#include "access/itup.h" |
||||
#include "access/tuptoaster.h" |
||||
#include "storage/bufpage.h" |
||||
#include "utils/array.h" |
||||
#include "utils/builtins.h" |
||||
|
||||
#include "tsvector.h" |
||||
#include "query.h" |
||||
#include "query_cleanup.h" |
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsvector); |
||||
Datum gin_extract_tsvector(PG_FUNCTION_ARGS); |
||||
|
||||
Datum |
||||
gin_extract_tsvector(PG_FUNCTION_ARGS) { |
||||
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); |
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1); |
||||
Datum *entries = NULL; |
||||
|
||||
*nentries = 0; |
||||
if ( vector->size > 0 ) { |
||||
int i; |
||||
WordEntry *we = ARRPTR( vector ); |
||||
|
||||
*nentries = (uint32)vector->size; |
||||
entries = (Datum*)palloc( sizeof(Datum) * vector->size ); |
||||
|
||||
for(i=0;i<vector->size;i++) { |
||||
text *txt = (text*)palloc( VARHDRSZ + we->len ); |
||||
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
|
||||
memcpy( VARDATA(txt), STRPTR( vector ) + we->pos, we->len );
|
||||
|
||||
entries[i] = PointerGetDatum( txt ); |
||||
|
||||
we++; |
||||
} |
||||
} |
||||
|
||||
PG_FREE_IF_COPY(vector, 0); |
||||
PG_RETURN_POINTER(entries); |
||||
} |
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsquery); |
||||
Datum gin_extract_tsquery(PG_FUNCTION_ARGS); |
||||
|
||||
Datum |
||||
gin_extract_tsquery(PG_FUNCTION_ARGS) { |
||||
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); |
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1); |
||||
StrategyNumber strategy = DatumGetUInt16( PG_GETARG_DATUM(2) );
|
||||
Datum *entries = NULL; |
||||
|
||||
*nentries = 0; |
||||
if ( query->size > 0 ) { |
||||
int4 i, j=0, len; |
||||
ITEM *item; |
||||
|
||||
item = clean_NOT_v2(GETQUERY(query), &len); |
||||
if ( !item ) |
||||
elog(ERROR,"Query requires full scan, GIN doesn't support it"); |
||||
|
||||
item = GETQUERY(query); |
||||
|
||||
for(i=0; i<query->size; i++) |
||||
if ( item[i].type == VAL ) |
||||
(*nentries)++; |
||||
|
||||
entries = (Datum*)palloc( sizeof(Datum) * (*nentries) ); |
||||
|
||||
for(i=0; i<query->size; i++) |
||||
if ( item[i].type == VAL ) { |
||||
text *txt; |
||||
|
||||
txt = (text*)palloc( VARHDRSZ + item[i].length ); |
||||
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
|
||||
memcpy( VARDATA(txt), GETOPERAND( query ) + item[i].distance, item[i].length ); |
||||
|
||||
entries[j++] = PointerGetDatum( txt ); |
||||
|
||||
if ( strategy == 1 && item[i].weight != 0 ) |
||||
elog(ERROR,"With class of lexeme restrictions use @@@ operation"); |
||||
} |
||||
|
||||
} |
||||
|
||||
PG_FREE_IF_COPY(query, 0); |
||||
PG_RETURN_POINTER(entries); |
||||
} |
||||
|
||||
typedef struct { |
||||
ITEM *frst; |
||||
bool *mapped_check; |
||||
} GinChkVal;
|
||||
|
||||
static bool |
||||
checkcondition_gin(void *checkval, ITEM * val) { |
||||
GinChkVal *gcv = (GinChkVal*)checkval; |
||||
|
||||
return gcv->mapped_check[ val - gcv->frst ]; |
||||
} |
||||
|
||||
PG_FUNCTION_INFO_V1(gin_ts_consistent); |
||||
Datum gin_ts_consistent(PG_FUNCTION_ARGS); |
||||
|
||||
Datum |
||||
gin_ts_consistent(PG_FUNCTION_ARGS) { |
||||
bool *check = (bool*)PG_GETARG_POINTER(0); |
||||
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); |
||||
bool res = FALSE; |
||||
|
||||
if ( query->size > 0 ) { |
||||
int4 i, j=0; |
||||
ITEM *item; |
||||
GinChkVal gcv; |
||||
|
||||
gcv.frst = item = GETQUERY(query);
|
||||
gcv.mapped_check= (bool*)palloc( sizeof(bool) * query->size ); |
||||
|
||||
for(i=0; i<query->size; i++) |
||||
if ( item[i].type == VAL ) |
||||
gcv.mapped_check[ i ] = check[ j++ ]; |
||||
|
||||
|
||||
res = TS_execute( |
||||
GETQUERY(query), |
||||
&gcv, |
||||
true, |
||||
checkcondition_gin |
||||
); |
||||
|
||||
} |
||||
|
||||
PG_FREE_IF_COPY(query, 2); |
||||
PG_RETURN_BOOL(res); |
||||
} |
||||
|
||||
|
@ -0,0 +1,32 @@ |
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile--
|
||||
# Makefile for access/gin
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.1 2006/05/02 11:28:54 teodor Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
subdir = src/backend/access/gin
|
||||
top_builddir = ../../../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
|
||||
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
|
||||
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
|
||||
ginbulk.o
|
||||
|
||||
all: SUBSYS.o |
||||
|
||||
SUBSYS.o: $(OBJS) |
||||
$(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
|
||||
|
||||
depend dep: |
||||
$(CC) -MM $(CFLAGS) *.c >depend
|
||||
|
||||
clean: |
||||
rm -f SUBSYS.o $(OBJS)
|
||||
|
||||
ifeq (depend,$(wildcard depend)) |
||||
include depend |
||||
endif |
@ -0,0 +1,153 @@ |
||||
Gin for PostgreSQL |
||||
================== |
||||
|
||||
Gin was sponsored by jfg://networks (http://www.jfg-networks.com/) |
||||
|
||||
Gin stands for Generalized Inverted Index and should be considered as a genie, |
||||
not a drink. |
||||
|
||||
Generalized means that the index does not know which operation it accelerates. |
||||
It instead works with custom strategies, defined for specific data types (read |
||||
"Index Method Strategies" in the PostgreSQL documentation). In that sense, Gin |
||||
is similar to GiST and differs from btree indices, which have predefined, |
||||
comparison-based operations. |
||||
|
||||
An inverted index is an index structure storing a set of (key, posting list) |
||||
pairs, where 'posting list' is a set of documents in which the key occurs. |
||||
(A text document would usually contain many keys.) The primary goal of |
||||
Gin indices is support for highly scalable, full-text search in PostgreSQL. |
||||
|
||||
Gin consists of a B-tree index constructed over entries (ET, entries tree), |
||||
where each entry is an element of the indexed value (element of array, lexeme |
||||
for tsvector) and where each tuple in a leaf page is either a pointer to a |
||||
B-tree over item pointers (PT, posting tree), or a list of item pointers |
||||
(PL, posting list) if the tuple is small enough. |
||||
|
||||
Note: There is no delete operation for ET. The reason for this is that from |
||||
our experience, a set of unique words over a large collection change very |
||||
rarely. This greatly simplifies the code and concurrency algorithms. |
||||
|
||||
Gin comes with built-in support for one-dimensional arrays (eg. integer[], |
||||
text[]), but no support for NULL elements. The following operations are |
||||
available: |
||||
|
||||
* contains: value_array @ query_array |
||||
* overlap: value_array && query_array |
||||
* contained: value_array ~ query_array |
||||
|
||||
Synopsis |
||||
-------- |
||||
|
||||
=# create index txt_idx on aa using gin(a); |
||||
|
||||
Features |
||||
-------- |
||||
|
||||
* Concurrency |
||||
* Write-Ahead Logging (WAL). (Recoverability from crashes.) |
||||
* User-defined opclasses. (The scheme is similar to GiST.) |
||||
* Optimized index creation (Makes use of maintenance_work_mem to accumulate |
||||
postings in memory.) |
||||
* Tsearch2 support via an opclass |
||||
* Soft upper limit on the returned results set using a GUC variable: |
||||
gin_fuzzy_search_limit |
||||
|
||||
Gin Fuzzy Limit |
||||
--------------- |
||||
|
||||
There are often situations when a full-text search returns a very large set of |
||||
results. Since reading tuples from the disk and sorting them could take a |
||||
lot of time, this is unacceptable for production. (Note that the search |
||||
itself is very fast.) |
||||
|
||||
Such queries usually contain very frequent lexemes, so the results are not |
||||
very helpful. To facilitate execution of such queries Gin has a configurable |
||||
soft upper limit of the size of the returned set, determined by the |
||||
'gin_fuzzy_search_limit' GUC variable. This is set to 0 by default (no |
||||
limit). |
||||
|
||||
If a non-zero search limit is set, then the returned set is a subset of the |
||||
whole result set, chosen at random. |
||||
|
||||
"Soft" means that the actual number of returned results could slightly differ |
||||
from the specified limit, depending on the query and the quality of the |
||||
system's random number generator. |
||||
|
||||
From experience, a value of 'gin_fuzzy_search_limit' in the thousands |
||||
(eg. 5000-20000) works well. This means that 'gin_fuzzy_search_limit' will |
||||
have no effect for queries returning a result set with less tuples than this |
||||
number. |
||||
|
||||
Limitations |
||||
----------- |
||||
|
||||
* No support for multicolumn indices |
||||
* Gin doesn't uses scan->kill_prior_tuple & scan->ignore_killed_tuples |
||||
* Gin searches entries only by equality matching. This may be improved in |
||||
future. |
||||
* Gin doesn't support full scans of indices. |
||||
* Gin doesn't index NULL values. |
||||
|
||||
Gin Interface |
||||
------------- |
||||
|
||||
Opclass interface pseudocode. An example for a Gin opclass can be found in |
||||
ginarayproc.c. |
||||
|
||||
Datum* extractValue(Datum inputValue, uint32* nentries) |
||||
|
||||
Returns an array of Datum of entries of the value to be indexed. nentries |
||||
should contain the number of returned entries. |
||||
|
||||
int compareEntry(Datum a, Datum b) |
||||
|
||||
Compares two entries (not the indexing values) |
||||
|
||||
Datum* extractQuery(Datum query, uint32* nentries, StrategyNumber n) |
||||
|
||||
Returns an array of Datum of entries of the query to be executed. |
||||
n contains the strategy number of the operation. |
||||
|
||||
bool consistent(bool[] check, StrategyNumber n, Datum query) |
||||
|
||||
The size of the check array is the same as sizeof of the array returned by |
||||
extractQuery. Each element of the check array is true if the indexed value |
||||
has a corresponding entry in the query. i.e. if (check[i] == TRUE) then |
||||
the i-th entry of the query is present in the indexed value. The Function |
||||
should return true if the indexed value matches by StrategyNumber and |
||||
the query. |
||||
|
||||
Open Items |
||||
---------- |
||||
|
||||
We appreciate any comments, help and suggestions. |
||||
|
||||
* Teach optimizer/executor that GIN is intrinsically clustered. i.e., it |
||||
always returns ItemPointer in ascending order. |
||||
* Tweak gincostestimate. |
||||
* GIN stores several ItemPointer to heap tuple, so VACUUM FULL produces |
||||
this warning message: |
||||
|
||||
WARNING: index "idx" contains 88395 row versions, but table contains |
||||
51812 row versions |
||||
HINT: Rebuild the index with REINDEX. |
||||
**** Workaround added |
||||
|
||||
TODO |
||||
---- |
||||
|
||||
Nearest future: |
||||
|
||||
* Opclasses for all types (no programming, just many catalog changes). |
||||
|
||||
Distant future: |
||||
|
||||
* Replace B-tree of entries to something like GiST |
||||
* Add multicolumn support |
||||
* Optimize insert operations (background index insertion) |
||||
|
||||
Authors |
||||
------- |
||||
|
||||
All work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov |
||||
(oleg@sai.msu.su). |
@ -0,0 +1,261 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginvacuum.c |
||||
* support function for GIN's indexing of any array |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "utils/array.h" |
||||
#include "utils/lsyscache.h" |
||||
#include "utils/syscache.h" |
||||
#include "utils/typcache.h" |
||||
#include "utils/builtins.h" |
||||
#include "access/gin.h" |
||||
|
||||
#define GinOverlapStrategy 1 |
||||
#define GinContainsStrategy 2 |
||||
#define GinContainedStrategy 3 |
||||
|
||||
#define ARRAYCHECK(x) do { \ |
||||
if ( ARR_HASNULL(x) ) \
|
||||
ereport(ERROR, \
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
|
||||
errmsg("array must not contain nulls"))); \
|
||||
\
|
||||
if ( ARR_NDIM(x) != 1 && ARR_NDIM(x) != 0 ) \
|
||||
ereport(ERROR, \
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), \
|
||||
errmsg("array must be one-dimensional"))); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* Function used as extractValue and extractQuery both |
||||
*/ |
||||
Datum |
||||
ginarrayextract(PG_FUNCTION_ARGS) { |
||||
ArrayType *array; |
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL; |
||||
int16 elmlen; |
||||
bool elmbyval; |
||||
char elmalign; |
||||
|
||||
/* we should guarantee that array will not be destroyed during all operation */ |
||||
array = PG_GETARG_ARRAYTYPE_P_COPY(0); |
||||
|
||||
ARRAYCHECK(array); |
||||
|
||||
get_typlenbyvalalign(ARR_ELEMTYPE(array), |
||||
&elmlen, &elmbyval, &elmalign); |
||||
|
||||
deconstruct_array(array, |
||||
ARR_ELEMTYPE(array), |
||||
elmlen, elmbyval, elmalign, |
||||
&entries, NULL, (int*)nentries); |
||||
|
||||
/* we should not free array, entries[i] points into it */ |
||||
PG_RETURN_POINTER(entries); |
||||
} |
||||
|
||||
Datum |
||||
ginarrayconsistent(PG_FUNCTION_ARGS) { |
||||
bool *check = (bool*)PG_GETARG_POINTER(0); |
||||
StrategyNumber strategy = PG_GETARG_UINT16(1); |
||||
ArrayType *query = PG_GETARG_ARRAYTYPE_P(2); |
||||
int res=FALSE, i, nentries=ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query)); |
||||
|
||||
/* we can do not check array carefully, it's done by previous ginarrayextract call */ |
||||
|
||||
switch( strategy ) { |
||||
case GinOverlapStrategy: |
||||
case GinContainedStrategy: |
||||
/* at least one element in check[] is true, so result = true */
|
||||
res = TRUE; |
||||
break; |
||||
case GinContainsStrategy: |
||||
res = TRUE; |
||||
for(i=0;i<nentries;i++) |
||||
if ( !check[i] ) { |
||||
res = FALSE; |
||||
break; |
||||
} |
||||
break; |
||||
default: |
||||
elog(ERROR, "ginarrayconsistent: unknown strategy number: %d", strategy); |
||||
} |
||||
|
||||
PG_RETURN_BOOL(res); |
||||
} |
||||
|
||||
static TypeCacheEntry* |
||||
fillTypeCacheEntry( TypeCacheEntry *typentry, Oid element_type ) { |
||||
if ( typentry && typentry->type_id == element_type ) |
||||
return typentry; |
||||
|
||||
typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); |
||||
if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_UNDEFINED_FUNCTION), |
||||
errmsg("could not identify an equality operator for type %s", format_type_be(element_type)))); |
||||
|
||||
return typentry; |
||||
} |
||||
|
||||
static bool |
||||
typeEQ(FunctionCallInfoData *locfcinfo, Datum a, Datum b) { |
||||
locfcinfo->arg[0] = a; |
||||
locfcinfo->arg[1] = b; |
||||
locfcinfo->argnull[0] = false; |
||||
locfcinfo->argnull[1] = false; |
||||
locfcinfo->isnull = false; |
||||
|
||||
return DatumGetBool(FunctionCallInvoke(locfcinfo)); |
||||
} |
||||
|
||||
static bool |
||||
ginArrayOverlap(TypeCacheEntry *typentry, ArrayType *a, ArrayType *b) { |
||||
Datum *da, *db; |
||||
int na, nb, j, i; |
||||
FunctionCallInfoData locfcinfo; |
||||
|
||||
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) ) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_DATATYPE_MISMATCH), |
||||
errmsg("cannot compare arrays of different element types"))); |
||||
|
||||
ARRAYCHECK(a); |
||||
ARRAYCHECK(b); |
||||
|
||||
deconstruct_array(a, |
||||
ARR_ELEMTYPE(a), |
||||
typentry->typlen, typentry->typbyval, typentry->typalign, |
||||
&da, NULL, &na); |
||||
deconstruct_array(b, |
||||
ARR_ELEMTYPE(b), |
||||
typentry->typlen, typentry->typbyval, typentry->typalign, |
||||
&db, NULL, &nb); |
||||
|
||||
InitFunctionCallInfoData(locfcinfo, &typentry->eq_opr_finfo, 2, |
||||
NULL, NULL); |
||||
|
||||
for(i=0;i<na;i++) { |
||||
for(j=0;j<nb;j++) { |
||||
if ( typeEQ(&locfcinfo, da[i], db[j]) ) { |
||||
pfree( da ); |
||||
pfree( db ); |
||||
return TRUE; |
||||
} |
||||
} |
||||
} |
||||
|
||||
pfree( da ); |
||||
pfree( db ); |
||||
|
||||
return FALSE; |
||||
} |
||||
|
||||
static bool |
||||
ginArrayContains(TypeCacheEntry *typentry, ArrayType *a, ArrayType *b) { |
||||
Datum *da, *db; |
||||
int na, nb, j, i, n = 0; |
||||
FunctionCallInfoData locfcinfo; |
||||
|
||||
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) ) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_DATATYPE_MISMATCH), |
||||
errmsg("cannot compare arrays of different element types"))); |
||||
|
||||
ARRAYCHECK(a); |
||||
ARRAYCHECK(b); |
||||
|
||||
deconstruct_array(a, |
||||
ARR_ELEMTYPE(a), |
||||
typentry->typlen, typentry->typbyval, typentry->typalign, |
||||
&da, NULL, &na); |
||||
deconstruct_array(b, |
||||
ARR_ELEMTYPE(b), |
||||
typentry->typlen, typentry->typbyval, typentry->typalign, |
||||
&db, NULL, &nb); |
||||
|
||||
InitFunctionCallInfoData(locfcinfo, &typentry->eq_opr_finfo, 2, |
||||
NULL, NULL); |
||||
|
||||
for(i=0;i<nb;i++) { |
||||
for(j=0;j<na;j++) { |
||||
if ( typeEQ(&locfcinfo, db[i], da[j]) ) { |
||||
n++; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
pfree( da ); |
||||
pfree( db ); |
||||
|
||||
return ( n==nb ) ? TRUE : FALSE; |
||||
} |
||||
|
||||
Datum |
||||
arrayoverlap(PG_FUNCTION_ARGS) { |
||||
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0); |
||||
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1); |
||||
TypeCacheEntry *typentry = fillTypeCacheEntry( fcinfo->flinfo->fn_extra, ARR_ELEMTYPE(a) );
|
||||
bool res; |
||||
|
||||
fcinfo->flinfo->fn_extra = (void*)typentry; |
||||
|
||||
res = ginArrayOverlap( typentry, a, b );
|
||||
|
||||
PG_FREE_IF_COPY(a,0); |
||||
PG_FREE_IF_COPY(b,1); |
||||
|
||||
PG_RETURN_BOOL(res); |
||||
} |
||||
|
||||
Datum |
||||
arraycontains(PG_FUNCTION_ARGS) { |
||||
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0); |
||||
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1); |
||||
TypeCacheEntry *typentry = fillTypeCacheEntry( fcinfo->flinfo->fn_extra, ARR_ELEMTYPE(a) );
|
||||
bool res; |
||||
|
||||
fcinfo->flinfo->fn_extra = (void*)typentry; |
||||
|
||||
res = ginArrayContains( typentry, a, b );
|
||||
|
||||
PG_FREE_IF_COPY(a,0); |
||||
PG_FREE_IF_COPY(b,1); |
||||
|
||||
PG_RETURN_BOOL(res); |
||||
} |
||||
|
||||
Datum |
||||
arraycontained(PG_FUNCTION_ARGS) { |
||||
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0); |
||||
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1); |
||||
TypeCacheEntry *typentry = fillTypeCacheEntry( fcinfo->flinfo->fn_extra, ARR_ELEMTYPE(a) );
|
||||
bool res; |
||||
|
||||
fcinfo->flinfo->fn_extra = (void*)typentry; |
||||
|
||||
res = ginArrayContains( typentry, b, a );
|
||||
|
||||
PG_FREE_IF_COPY(a,0); |
||||
PG_FREE_IF_COPY(b,1); |
||||
|
||||
PG_RETURN_BOOL(res); |
||||
} |
||||
|
@ -0,0 +1,394 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginbtree.c |
||||
* page utilities routines for the postgres inverted index access method. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
|
||||
/*
|
||||
* Locks buffer by needed method for search. |
||||
*/ |
||||
static int |
||||
ginTraverseLock(Buffer buffer, bool searchMode) { |
||||
Page page; |
||||
int access=GIN_SHARE; |
||||
|
||||
LockBuffer(buffer, GIN_SHARE); |
||||
page = BufferGetPage( buffer ); |
||||
if ( GinPageIsLeaf(page) ) { |
||||
if ( searchMode == FALSE ) { |
||||
/* we should relock our page */ |
||||
LockBuffer(buffer, GIN_UNLOCK); |
||||
LockBuffer(buffer, GIN_EXCLUSIVE); |
||||
|
||||
/* But root can become non-leaf during relock */ |
||||
if ( !GinPageIsLeaf(page) ) { |
||||
/* resore old lock type (very rare) */
|
||||
LockBuffer(buffer, GIN_UNLOCK); |
||||
LockBuffer(buffer, GIN_SHARE); |
||||
} else |
||||
access = GIN_EXCLUSIVE; |
||||
} |
||||
} |
||||
|
||||
return access; |
||||
} |
||||
|
||||
GinBtreeStack* |
||||
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno) { |
||||
GinBtreeStack *stack = (GinBtreeStack*)palloc(sizeof(GinBtreeStack)); |
||||
|
||||
stack->blkno = blkno; |
||||
stack->buffer = ReadBuffer(btree->index, stack->blkno); |
||||
stack->parent = NULL; |
||||
stack->predictNumber = 1; |
||||
|
||||
ginTraverseLock(stack->buffer, btree->searchMode); |
||||
|
||||
return stack; |
||||
} |
||||
|
||||
/*
|
||||
* Locates leaf page contained tuple |
||||
*/ |
||||
GinBtreeStack*
|
||||
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack) { |
||||
bool isfirst=TRUE; |
||||
BlockNumber rootBlkno; |
||||
|
||||
if ( !stack )
|
||||
stack = ginPrepareFindLeafPage(btree, GIN_ROOT_BLKNO); |
||||
rootBlkno = stack->blkno; |
||||
|
||||
for(;;) { |
||||
Page page; |
||||
BlockNumber child; |
||||
int access=GIN_SHARE; |
||||
|
||||
stack->off = InvalidOffsetNumber; |
||||
|
||||
page = BufferGetPage( stack->buffer ); |
||||
|
||||
if ( isfirst ) { |
||||
if ( GinPageIsLeaf(page) && !btree->searchMode ) |
||||
access = GIN_EXCLUSIVE; |
||||
isfirst = FALSE; |
||||
} else |
||||
access = ginTraverseLock(stack->buffer, btree->searchMode); |
||||
|
||||
/* ok, page is correctly locked, we should check to move right ..,
|
||||
root never has a right link, so small optimization */ |
||||
while( btree->fullScan==FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page) ) { |
||||
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; |
||||
|
||||
if ( rightlink==InvalidBlockNumber ) |
||||
/* rightmost page */ |
||||
break; |
||||
|
||||
stack->blkno = rightlink; |
||||
LockBuffer(stack->buffer, GIN_UNLOCK); |
||||
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno); |
||||
LockBuffer(stack->buffer, access);
|
||||
page = BufferGetPage( stack->buffer ); |
||||
} |
||||
|
||||
if ( GinPageIsLeaf(page) ) /* we found, return locked page */
|
||||
return stack; |
||||
|
||||
/* now we have correct buffer, try to find child */ |
||||
child = btree->findChildPage(btree, stack); |
||||
|
||||
LockBuffer(stack->buffer, GIN_UNLOCK); |
||||
Assert( child != InvalidBlockNumber );
|
||||
Assert( stack->blkno != child ); |
||||
|
||||
if ( btree->searchMode ) {
|
||||
/* in search mode we may forget path to leaf */ |
||||
stack->blkno = child; |
||||
stack->buffer = ReleaseAndReadBuffer( stack->buffer, btree->index, stack->blkno ); |
||||
} else { |
||||
GinBtreeStack *ptr = (GinBtreeStack*)palloc(sizeof(GinBtreeStack)); |
||||
|
||||
ptr->parent = stack; |
||||
stack = ptr; |
||||
stack->blkno = child; |
||||
stack->buffer = ReadBuffer(btree->index, stack->blkno); |
||||
stack->predictNumber = 1; |
||||
} |
||||
} |
||||
|
||||
/* keep compiler happy */ |
||||
return NULL; |
||||
} |
||||
|
||||
void |
||||
freeGinBtreeStack( GinBtreeStack *stack ) { |
||||
while(stack) { |
||||
GinBtreeStack *tmp = stack->parent; |
||||
if ( stack->buffer != InvalidBuffer ) |
||||
ReleaseBuffer(stack->buffer); |
||||
|
||||
pfree( stack ); |
||||
stack = tmp; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* Try to find parent for current stack position, returns correct
|
||||
* parent and child's offset in stack->parent. |
||||
* Function should never release root page to prevent conflicts |
||||
* with vacuum process |
||||
*/ |
||||
void |
||||
findParents( GinBtree btree, GinBtreeStack *stack,
|
||||
BlockNumber rootBlkno) { |
||||
|
||||
Page page; |
||||
Buffer buffer; |
||||
BlockNumber blkno, leftmostBlkno; |
||||
OffsetNumber offset; |
||||
GinBtreeStack *root = stack->parent; |
||||
GinBtreeStack *ptr; |
||||
|
||||
if ( !root ) { |
||||
/* XLog mode... */ |
||||
root = (GinBtreeStack*)palloc(sizeof(GinBtreeStack)); |
||||
root->blkno = rootBlkno; |
||||
root->buffer = ReadBuffer(btree->index, rootBlkno); |
||||
LockBuffer(root->buffer, GIN_EXCLUSIVE); |
||||
root->parent = NULL; |
||||
} else {
|
||||
/* find root, we should not release root page until update is finished!! */ |
||||
while( root->parent ) { |
||||
ReleaseBuffer( root->buffer ); |
||||
root = root->parent; |
||||
} |
||||
|
||||
Assert( root->blkno == rootBlkno ); |
||||
Assert( BufferGetBlockNumber(root->buffer) == rootBlkno ); |
||||
LockBuffer(root->buffer, GIN_EXCLUSIVE); |
||||
} |
||||
root->off = InvalidOffsetNumber; |
||||
|
||||
page = BufferGetPage(root->buffer); |
||||
Assert( !GinPageIsLeaf(page) ); |
||||
|
||||
/* check trivial case */ |
||||
if ( (root->off != btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidBuffer ) { |
||||
stack->parent = root; |
||||
return; |
||||
} |
||||
|
||||
leftmostBlkno = blkno = btree->getLeftMostPage(btree, page); |
||||
LockBuffer(root->buffer, GIN_UNLOCK ); |
||||
Assert( blkno!=InvalidBlockNumber ); |
||||
|
||||
|
||||
for(;;) { |
||||
buffer = ReadBuffer(btree->index, blkno); |
||||
LockBuffer(buffer, GIN_EXCLUSIVE); |
||||
page = BufferGetPage(root->buffer); |
||||
if ( GinPageIsLeaf(page) ) |
||||
elog(ERROR, "Lost path"); |
||||
|
||||
leftmostBlkno = btree->getLeftMostPage(btree, page); |
||||
|
||||
while( (offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber))==InvalidOffsetNumber ) { |
||||
blkno = GinPageGetOpaque(page)->rightlink; |
||||
LockBuffer(buffer,GIN_UNLOCK); |
||||
ReleaseBuffer(buffer); |
||||
if ( blkno == InvalidBlockNumber ) |
||||
break; |
||||
buffer = ReadBuffer(btree->index, blkno); |
||||
LockBuffer(buffer, GIN_EXCLUSIVE); |
||||
page = BufferGetPage(buffer); |
||||
} |
||||
|
||||
if ( blkno != InvalidBlockNumber ) { |
||||
ptr = (GinBtreeStack*)palloc(sizeof(GinBtreeStack)); |
||||
ptr->blkno = blkno; |
||||
ptr->buffer = buffer; |
||||
ptr->parent = root; /* it's may be wrong, but in next call we will correct */ |
||||
stack->parent = ptr; |
||||
return; |
||||
} |
||||
|
||||
blkno = leftmostBlkno; |
||||
} |
||||
} |
||||
|
||||
/*
|
||||
* Insert value (stored in GinBtree) to tree descibed by stack |
||||
*/ |
||||
void |
||||
ginInsertValue(GinBtree btree, GinBtreeStack *stack) { |
||||
GinBtreeStack *parent = stack; |
||||
BlockNumber rootBlkno = InvalidBuffer;
|
||||
Page page, rpage, lpage; |
||||
|
||||
/* remember root BlockNumber */ |
||||
while( parent ) { |
||||
rootBlkno = parent->blkno; |
||||
parent = parent->parent; |
||||
} |
||||
|
||||
while( stack ) { |
||||
XLogRecData *rdata; |
||||
BlockNumber savedRightLink; |
||||
|
||||
page = BufferGetPage( stack->buffer ); |
||||
savedRightLink = GinPageGetOpaque(page)->rightlink; |
||||
|
||||
if ( btree->isEnoughSpace( btree, stack->buffer, stack->off ) ) { |
||||
START_CRIT_SECTION(); |
||||
btree->placeToPage( btree, stack->buffer, stack->off, &rdata ); |
||||
|
||||
if (!btree->index->rd_istemp) { |
||||
XLogRecPtr recptr; |
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
}
|
||||
|
||||
MarkBufferDirty( stack->buffer ); |
||||
UnlockReleaseBuffer(stack->buffer); |
||||
END_CRIT_SECTION(); |
||||
|
||||
freeGinBtreeStack(stack->parent); |
||||
return; |
||||
} else { |
||||
Buffer rbuffer = GinNewBuffer(btree->index); |
||||
Page newlpage; |
||||
|
||||
/* newlpage is a pointer to memory page, it does'nt assosiates with buffer,
|
||||
stack->buffer shoud be untouched */ |
||||
newlpage = btree->splitPage( btree, stack->buffer, rbuffer, stack->off, &rdata ); |
||||
|
||||
|
||||
((ginxlogSplit*)(rdata->data))->rootBlkno = rootBlkno; |
||||
|
||||
parent = stack->parent; |
||||
|
||||
if ( parent == NULL ) { |
||||
/* split root, so we need to allocate new left page and
|
||||
place pointer on root to left and right page */ |
||||
Buffer lbuffer = GinNewBuffer(btree->index); |
||||
|
||||
((ginxlogSplit*)(rdata->data))->isRootSplit = TRUE; |
||||
((ginxlogSplit*)(rdata->data))->rrlink = InvalidBlockNumber; |
||||
|
||||
|
||||
page = BufferGetPage( stack->buffer ); |
||||
lpage = BufferGetPage( lbuffer ); |
||||
rpage = BufferGetPage( rbuffer ); |
||||
|
||||
GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber; |
||||
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); |
||||
((ginxlogSplit*)(rdata->data))->lblkno = BufferGetBlockNumber(lbuffer); |
||||
|
||||
START_CRIT_SECTION(); |
||||
|
||||
GinInitBuffer( stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF ); |
||||
PageRestoreTempPage( newlpage, lpage );
|
||||
btree->fillRoot( btree, stack->buffer, lbuffer, rbuffer ); |
||||
if (!btree->index->rd_istemp) { |
||||
XLogRecPtr recptr; |
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
PageSetLSN(lpage, recptr); |
||||
PageSetTLI(lpage, ThisTimeLineID); |
||||
PageSetLSN(rpage, recptr); |
||||
PageSetTLI(rpage, ThisTimeLineID); |
||||
} |
||||
|
||||
MarkBufferDirty(rbuffer); |
||||
UnlockReleaseBuffer(rbuffer); |
||||
MarkBufferDirty(lbuffer); |
||||
UnlockReleaseBuffer(lbuffer); |
||||
MarkBufferDirty(stack->buffer); |
||||
UnlockReleaseBuffer(stack->buffer); |
||||
|
||||
END_CRIT_SECTION(); |
||||
|
||||
return; |
||||
} else { |
||||
/* split non-root page */ |
||||
((ginxlogSplit*)(rdata->data))->isRootSplit = FALSE; |
||||
((ginxlogSplit*)(rdata->data))->rrlink = savedRightLink; |
||||
|
||||
lpage = BufferGetPage( stack->buffer ); |
||||
rpage = BufferGetPage( rbuffer ); |
||||
|
||||
GinPageGetOpaque(rpage)->rightlink = savedRightLink; |
||||
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); |
||||
|
||||
START_CRIT_SECTION(); |
||||
PageRestoreTempPage( newlpage, lpage );
|
||||
if (!btree->index->rd_istemp) { |
||||
XLogRecPtr recptr; |
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); |
||||
PageSetLSN(lpage, recptr); |
||||
PageSetTLI(lpage, ThisTimeLineID); |
||||
PageSetLSN(rpage, recptr); |
||||
PageSetTLI(rpage, ThisTimeLineID); |
||||
} |
||||
MarkBufferDirty(rbuffer); |
||||
UnlockReleaseBuffer(rbuffer); |
||||
MarkBufferDirty( stack->buffer ); |
||||
END_CRIT_SECTION(); |
||||
} |
||||
} |
||||
|
||||
btree->isDelete = FALSE; |
||||
|
||||
/* search parent to lock */ |
||||
LockBuffer(parent->buffer, GIN_EXCLUSIVE); |
||||
|
||||
/* move right if it's needed */ |
||||
page = BufferGetPage( parent->buffer ); |
||||
while( (parent->off=btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber ) { |
||||
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; |
||||
|
||||
LockBuffer(parent->buffer, GIN_UNLOCK); |
||||
|
||||
if ( rightlink==InvalidBlockNumber ) {
|
||||
/* rightmost page, but we don't find parent, we should
|
||||
use plain search... */ |
||||
findParents(btree, stack, rootBlkno); |
||||
parent=stack->parent; |
||||
page = BufferGetPage( parent->buffer ); |
||||
break; |
||||
} |
||||
|
||||
parent->blkno = rightlink; |
||||
parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno); |
||||
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
||||
page = BufferGetPage( parent->buffer ); |
||||
} |
||||
|
||||
UnlockReleaseBuffer(stack->buffer); |
||||
pfree( stack ); |
||||
stack = parent; |
||||
} |
||||
} |
||||
|
||||
|
@ -0,0 +1,155 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginbulk.c |
||||
* routines for fast build of inverted index
|
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "utils/memutils.h" |
||||
#include "access/tuptoaster.h" |
||||
|
||||
#define DEF_NENTRY 128 |
||||
#define DEF_NPTR 4 |
||||
|
||||
void |
||||
ginInitBA(BuildAccumulator *accum) { |
||||
|
||||
accum->number = 0; |
||||
accum->curget = 0; |
||||
accum->length = DEF_NENTRY; |
||||
accum->entries = (EntryAccumulator*)palloc0( sizeof(EntryAccumulator) * DEF_NENTRY ); |
||||
accum->allocatedMemory = sizeof(EntryAccumulator) * DEF_NENTRY; |
||||
} |
||||
|
||||
/*
|
||||
* Stores heap item pointer. For robust, it checks that |
||||
* item pointer are ordered |
||||
*/ |
||||
static void |
||||
ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr) { |
||||
if ( entry->number >= entry->length ) { |
||||
accum->allocatedMemory += sizeof(ItemPointerData) * entry->length; |
||||
entry->length *= 2; |
||||
entry->list = (ItemPointerData*)repalloc(entry->list, |
||||
sizeof(ItemPointerData)*entry->length); |
||||
} |
||||
|
||||
if ( entry->shouldSort==FALSE ) {
|
||||
int res = compareItemPointers( entry->list + entry->number - 1, heapptr ); |
||||
|
||||
Assert( res != 0 ); |
||||
|
||||
if ( res > 0 ) |
||||
entry->shouldSort=TRUE; |
||||
} |
||||
|
||||
entry->list[ entry->number ] = *heapptr; |
||||
entry->number++; |
||||
} |
||||
|
||||
/*
|
||||
* Find/store one entry from indexed value. |
||||
* It supposes, that entry should be located between low and end of array of |
||||
* entries. Returns position of found/inserted entry |
||||
*/ |
||||
static uint32 |
||||
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry, uint32 low) { |
||||
uint32 high = accum->number, mid; |
||||
int res; |
||||
|
||||
while(high>low) {
|
||||
mid = low + ((high - low) / 2); |
||||
|
||||
res = compareEntries(accum->ginstate, entry, accum->entries[mid].value); |
||||
|
||||
if ( res == 0 ) { |
||||
ginInsertData( accum, accum->entries+mid, heapptr ); |
||||
return mid; |
||||
} else if ( res > 0 ) |
||||
low = mid + 1; |
||||
else |
||||
high = mid; |
||||
} |
||||
|
||||
/* did not find an entry, insert */ |
||||
if ( accum->number >= accum->length ) { |
||||
accum->allocatedMemory += sizeof(EntryAccumulator) * accum->length; |
||||
accum->length *= 2; |
||||
accum->entries = (EntryAccumulator*)repalloc( accum->entries,
|
||||
sizeof(EntryAccumulator) * accum->length ); |
||||
} |
||||
|
||||
if ( high != accum->number )
|
||||
memmove( accum->entries+high+1, accum->entries+high, sizeof(EntryAccumulator) * (accum->number-high) ); |
||||
|
||||
accum->entries[high].value = entry; |
||||
accum->entries[high].length = DEF_NPTR; |
||||
accum->entries[high].number = 1; |
||||
accum->entries[high].shouldSort = FALSE; |
||||
accum->entries[high].list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR); |
||||
accum->entries[high].list[0] = *heapptr; |
||||
|
||||
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR; |
||||
accum->number++; |
||||
|
||||
return high; |
||||
} |
||||
|
||||
|
||||
/*
|
||||
* Insert one heap pointer. Requires entries to be sorted! |
||||
*/ |
||||
void |
||||
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) { |
||||
uint32 start=0,i; |
||||
|
||||
for(i=0;i<nentry;i++) |
||||
start = ginInsertEntry( accum, heapptr, entries[i], start); |
||||
} |
||||
|
||||
static int
|
||||
qsortCompareItemPointers( const void *a, const void *b ) { |
||||
int res = compareItemPointers( (ItemPointer)a, (ItemPointer)b ); |
||||
Assert( res!=0 ); |
||||
return res; |
||||
} |
||||
|
||||
ItemPointerData* |
||||
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) { |
||||
EntryAccumulator *entry; |
||||
|
||||
ItemPointerData *list; |
||||
if ( accum->curget >= accum->number ) |
||||
return NULL; |
||||
else if ( accum->curget > 0 ) |
||||
pfree( accum->entries[ accum->curget-1 ].list ); |
||||
|
||||
entry = accum->entries + accum->curget; |
||||
*n = entry->number; |
||||
*value = entry->value; |
||||
list = entry->list; |
||||
accum->curget++; |
||||
|
||||
if ( entry->shouldSort && entry->number > 1 ) |
||||
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers); |
||||
|
||||
|
||||
return list; |
||||
} |
||||
|
||||
|
||||
|
@ -0,0 +1,569 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* gindatapage.c |
||||
* page utilities routines for the postgres inverted index access method. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
|
||||
int |
||||
compareItemPointers( ItemPointer a, ItemPointer b ) { |
||||
if ( GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b) ) { |
||||
if ( GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b) ) |
||||
return 0; |
||||
return ( GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b) ) ? 1 : -1; |
||||
}
|
||||
|
||||
return ( GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b) ) ? 1 : -1; |
||||
} |
||||
|
||||
/*
|
||||
* Merge two ordered array of itempointer |
||||
*/ |
||||
void
|
||||
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb) { |
||||
ItemPointerData *dptr = dst; |
||||
ItemPointerData *aptr = a, *bptr = b; |
||||
|
||||
while( aptr - a < na && bptr - b < nb ) { |
||||
if ( compareItemPointers(aptr, bptr) > 0 )
|
||||
*dptr++ = *bptr++; |
||||
else |
||||
*dptr++ = *aptr++; |
||||
} |
||||
|
||||
while( aptr - a < na ) |
||||
*dptr++ = *aptr++; |
||||
|
||||
while( bptr - b < nb ) |
||||
*dptr++ = *bptr++; |
||||
} |
||||
|
||||
/*
|
||||
* Checks, should we move to right link...
|
||||
* Compares inserting itemp pointer with right bound of current page |
||||
*/ |
||||
static bool |
||||
dataIsMoveRight(GinBtree btree, Page page) { |
||||
ItemPointer iptr = GinDataPageGetRightBound(page); |
||||
|
||||
if ( GinPageRightMost(page) ) |
||||
return FALSE; |
||||
|
||||
return ( compareItemPointers( btree->items + btree->curitem, iptr ) > 0 ) ? TRUE : FALSE; |
||||
} |
||||
|
||||
/*
|
||||
* Find correct PostingItem in non-leaf page. It supposed that |
||||
* page correctly choosen and searching value SHOULD be on page |
||||
*/ |
||||
static BlockNumber |
||||
dataLocateItem(GinBtree btree, GinBtreeStack *stack) { |
||||
OffsetNumber low, high, maxoff; |
||||
PostingItem *pitem=NULL; |
||||
int result; |
||||
Page page = BufferGetPage( stack->buffer ); |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
if ( btree->fullScan ) { |
||||
stack->off = FirstOffsetNumber; |
||||
stack->predictNumber *= GinPageGetOpaque(page)->maxoff; |
||||
return btree->getLeftMostPage(btree, page); |
||||
} |
||||
|
||||
low = FirstOffsetNumber; |
||||
maxoff = high = GinPageGetOpaque(page)->maxoff;
|
||||
Assert( high >= low ); |
||||
|
||||
high++; |
||||
|
||||
while (high > low) { |
||||
OffsetNumber mid = low + ((high - low) / 2); |
||||
pitem = (PostingItem*)GinDataPageGetItem(page,mid);
|
||||
|
||||
if ( mid == maxoff ) |
||||
/* Right infinity, page already correctly choosen
|
||||
with a help of dataIsMoveRight */ |
||||
result = -1; |
||||
else { |
||||
pitem = (PostingItem*)GinDataPageGetItem(page,mid); |
||||
result = compareItemPointers( btree->items + btree->curitem, &( pitem->key ) ); |
||||
} |
||||
|
||||
if ( result == 0 ) { |
||||
stack->off = mid; |
||||
return PostingItemGetBlockNumber(pitem); |
||||
} else if ( result > 0 ) |
||||
low = mid + 1; |
||||
else |
||||
high = mid; |
||||
} |
||||
|
||||
Assert( high>=FirstOffsetNumber && high <= maxoff ); |
||||
|
||||
stack->off = high; |
||||
pitem = (PostingItem*)GinDataPageGetItem(page,high); |
||||
return PostingItemGetBlockNumber(pitem); |
||||
} |
||||
|
||||
/*
|
||||
* Searches correct position for value on leaf page. |
||||
* Page should be corrrectly choosen.
|
||||
* Returns true if value found on page. |
||||
*/ |
||||
static bool |
||||
dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) { |
||||
Page page = BufferGetPage( stack->buffer ); |
||||
OffsetNumber low, high; |
||||
int result; |
||||
|
||||
Assert( GinPageIsLeaf(page) ); |
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
if ( btree->fullScan ) { |
||||
stack->off = FirstOffsetNumber; |
||||
return TRUE; |
||||
} |
||||
|
||||
low=FirstOffsetNumber; |
||||
high = GinPageGetOpaque(page)->maxoff; |
||||
|
||||
if ( high < low ) { |
||||
stack->off = FirstOffsetNumber; |
||||
return false; |
||||
} |
||||
|
||||
high++; |
||||
|
||||
while (high > low) { |
||||
OffsetNumber mid = low + ((high - low) / 2); |
||||
|
||||
result = compareItemPointers( btree->items + btree->curitem, (ItemPointer)GinDataPageGetItem(page,mid) ); |
||||
|
||||
if ( result == 0 ) { |
||||
stack->off = mid; |
||||
return true; |
||||
} else if ( result > 0 ) |
||||
low = mid + 1; |
||||
else |
||||
high = mid; |
||||
} |
||||
|
||||
stack->off = high; |
||||
return false; |
||||
} |
||||
|
||||
/*
|
||||
* Finds links to blkno on non-leaf page, retuns |
||||
* offset of PostingItem |
||||
*/ |
||||
static OffsetNumber |
||||
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { |
||||
OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff; |
||||
PostingItem *pitem; |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
/* if page isn't changed, we returns storedOff */ |
||||
if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) { |
||||
pitem = (PostingItem*)GinDataPageGetItem(page, storedOff); |
||||
if ( PostingItemGetBlockNumber(pitem) == blkno )
|
||||
return storedOff; |
||||
|
||||
/* we hope, that needed pointer goes to right. It's true
|
||||
if there wasn't a deletion */ |
||||
for( i=storedOff+1 ; i <= maxoff ; i++ ) { |
||||
pitem = (PostingItem*)GinDataPageGetItem(page, i); |
||||
if ( PostingItemGetBlockNumber(pitem) == blkno ) |
||||
return i; |
||||
} |
||||
|
||||
maxoff = storedOff-1; |
||||
} |
||||
|
||||
/* last chance */ |
||||
for( i=FirstOffsetNumber; i <= maxoff ; i++ ) { |
||||
pitem = (PostingItem*)GinDataPageGetItem(page, i); |
||||
if ( PostingItemGetBlockNumber(pitem) == blkno ) |
||||
return i; |
||||
} |
||||
|
||||
return InvalidOffsetNumber; |
||||
} |
||||
|
||||
/*
|
||||
* retunrs blkno of lefmost child |
||||
*/ |
||||
static BlockNumber |
||||
dataGetLeftMostPage(GinBtree btree, Page page) { |
||||
PostingItem *pitem; |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( GinPageIsData(page) ); |
||||
Assert( GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber ); |
||||
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, FirstOffsetNumber); |
||||
return PostingItemGetBlockNumber(pitem); |
||||
} |
||||
|
||||
/*
|
||||
* add ItemPointer or PostingItem to page. data should points to |
||||
* correct value! depending on leaf or non-leaf page |
||||
*/ |
||||
void |
||||
GinDataPageAddItem( Page page, void *data, OffsetNumber offset ) { |
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; |
||||
char *ptr; |
||||
|
||||
if ( offset == InvalidOffsetNumber ) { |
||||
ptr = GinDataPageGetItem(page,maxoff+1); |
||||
} else { |
||||
ptr = GinDataPageGetItem(page,offset); |
||||
if ( maxoff+1-offset != 0 ) |
||||
memmove( ptr+GinSizeOfItem(page), ptr, (maxoff-offset+1) * GinSizeOfItem(page) ); |
||||
} |
||||
memcpy( ptr, data, GinSizeOfItem(page) ); |
||||
|
||||
GinPageGetOpaque(page)->maxoff++; |
||||
} |
||||
|
||||
/*
|
||||
* Deletes posting item from non-leaf page |
||||
*/ |
||||
void |
||||
PageDeletePostingItem(Page page, OffsetNumber offset) { |
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( offset>=FirstOffsetNumber && offset <= maxoff ); |
||||
|
||||
if ( offset != maxoff ) |
||||
memmove( GinDataPageGetItem(page,offset), GinDataPageGetItem(page,offset+1), |
||||
sizeof(PostingItem) * (maxoff-offset) ); |
||||
|
||||
GinPageGetOpaque(page)->maxoff--; |
||||
} |
||||
|
||||
/*
|
||||
* checks space to install new value, |
||||
* item pointer never deletes! |
||||
*/ |
||||
static bool |
||||
dataIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) { |
||||
Page page = BufferGetPage(buf); |
||||
|
||||
Assert( GinPageIsData(page) ); |
||||
Assert( !btree->isDelete ); |
||||
|
||||
if ( GinPageIsLeaf(page) ) { |
||||
if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) { |
||||
if ( (btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) ) |
||||
return true; |
||||
} else if ( sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) ) |
||||
return true; |
||||
} else if ( sizeof(PostingItem) <= GinDataPageGetFreeSpace(page) ) |
||||
return true; |
||||
|
||||
return false; |
||||
} |
||||
|
||||
/*
|
||||
* In case of previous split update old child blkno to |
||||
* new right page |
||||
* item pointer never deletes! |
||||
*/ |
||||
static BlockNumber |
||||
dataPrepareData( GinBtree btree, Page page, OffsetNumber off) {
|
||||
BlockNumber ret = InvalidBlockNumber; |
||||
|
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) { |
||||
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page,off); |
||||
PostingItemSetBlockNumber( pitem, btree->rightblkno ); |
||||
ret = btree->rightblkno; |
||||
} |
||||
|
||||
btree->rightblkno = InvalidBlockNumber; |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
/*
|
||||
* Places keys to page and fills WAL record. In case leaf page and |
||||
* build mode puts all ItemPointers to page. |
||||
*/ |
||||
static void |
||||
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { |
||||
Page page = BufferGetPage(buf); |
||||
static XLogRecData rdata[3]; |
||||
int sizeofitem = GinSizeOfItem(page); |
||||
static ginxlogInsert data; |
||||
|
||||
*prdata = rdata; |
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
data.updateBlkno = dataPrepareData( btree, page, off ); |
||||
|
||||
data.node = btree->index->rd_node; |
||||
data.blkno = BufferGetBlockNumber( buf ); |
||||
data.offset = off; |
||||
data.nitem = 1; |
||||
data.isDelete = FALSE; |
||||
data.isData = TRUE; |
||||
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; |
||||
|
||||
rdata[0].buffer = buf; |
||||
rdata[0].buffer_std = FALSE; |
||||
rdata[0].data = NULL; |
||||
rdata[0].len = 0; |
||||
rdata[0].next = &rdata[1]; |
||||
|
||||
rdata[1].buffer = InvalidBuffer; |
||||
rdata[1].data = (char *) &data; |
||||
rdata[1].len = sizeof(ginxlogInsert); |
||||
rdata[1].next = &rdata[2]; |
||||
|
||||
rdata[2].buffer = InvalidBuffer; |
||||
rdata[2].data = (GinPageIsLeaf(page)) ? ((char*)(btree->items+btree->curitem)) : ((char*)&(btree->pitem)); |
||||
rdata[2].len = sizeofitem; |
||||
rdata[2].next = NULL; |
||||
|
||||
if ( GinPageIsLeaf(page) ) { |
||||
if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) { |
||||
/* usually, create index... */ |
||||
uint32 savedPos = btree->curitem; |
||||
|
||||
while( btree->curitem < btree->nitem ) { |
||||
GinDataPageAddItem(page, btree->items+btree->curitem, off); |
||||
off++; |
||||
btree->curitem++; |
||||
} |
||||
data.nitem = btree->curitem-savedPos; |
||||
rdata[2].len = sizeofitem * data.nitem; |
||||
} else { |
||||
GinDataPageAddItem(page, btree->items+btree->curitem, off); |
||||
btree->curitem++; |
||||
} |
||||
} else |
||||
GinDataPageAddItem(page, &(btree->pitem), off); |
||||
} |
||||
|
||||
/*
|
||||
* split page and fills WAL record. original buffer(lbuf) leaves untouched, |
||||
* returns shadow page of lbuf filled new data. In leaf page and build mode puts all
|
||||
* ItemPointers to pages. Also, in build mode splits data by way to full fulled |
||||
* left page |
||||
*/ |
||||
static Page |
||||
dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { |
||||
static ginxlogSplit data; |
||||
static XLogRecData rdata[4]; |
||||
static char vector[2*BLCKSZ]; |
||||
char *ptr; |
||||
OffsetNumber separator; |
||||
ItemPointer bound; |
||||
Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) ); |
||||
ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); |
||||
int sizeofitem = GinSizeOfItem(lpage); |
||||
OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; |
||||
Page rpage = BufferGetPage( rbuf ); |
||||
Size pageSize = PageGetPageSize( lpage ); |
||||
Size freeSpace; |
||||
uint32 nCopied = 1; |
||||
|
||||
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize ); |
||||
freeSpace = GinDataPageGetFreeSpace(rpage); |
||||
|
||||
*prdata = rdata; |
||||
data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ? |
||||
InvalidOffsetNumber : PostingItemGetBlockNumber( &(btree->pitem) ); |
||||
data.updateBlkno = dataPrepareData( btree, lpage, off ); |
||||
|
||||
memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber), |
||||
maxoff*sizeofitem); |
||||
|
||||
if ( GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff ) { |
||||
nCopied = 0; |
||||
while( btree->curitem < btree->nitem && maxoff*sizeof(ItemPointerData) < 2*(freeSpace - sizeof(ItemPointerData)) ) { |
||||
memcpy( vector + maxoff*sizeof(ItemPointerData), btree->items+btree->curitem, |
||||
sizeof(ItemPointerData) ); |
||||
maxoff++; |
||||
nCopied++; |
||||
btree->curitem++; |
||||
} |
||||
} else { |
||||
ptr = vector + (off-1)*sizeofitem; |
||||
if ( maxoff+1-off != 0 )
|
||||
memmove( ptr+sizeofitem, ptr, (maxoff-off+1) * sizeofitem ); |
||||
if ( GinPageIsLeaf(lpage) ) {
|
||||
memcpy(ptr, btree->items+btree->curitem, sizeofitem ); |
||||
btree->curitem++; |
||||
} else |
||||
memcpy(ptr, &(btree->pitem), sizeofitem ); |
||||
|
||||
maxoff++; |
||||
} |
||||
|
||||
/* we suppose that during index creation table scaned from
|
||||
begin to end, so ItemPointers are monotonically increased.. */
|
||||
if ( btree->isBuild && GinPageRightMost(lpage) ) |
||||
separator=freeSpace/sizeofitem; |
||||
else |
||||
separator=maxoff/2; |
||||
|
||||
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize ); |
||||
GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize ); |
||||
|
||||
memcpy( GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem ); |
||||
GinPageGetOpaque(lpage)->maxoff = separator; |
||||
memcpy( GinDataPageGetItem(rpage, FirstOffsetNumber),
|
||||
vector + separator * sizeofitem, (maxoff-separator) * sizeofitem ); |
||||
GinPageGetOpaque(rpage)->maxoff = maxoff-separator; |
||||
|
||||
PostingItemSetBlockNumber( &(btree->pitem), BufferGetBlockNumber(lbuf) ); |
||||
if ( GinPageIsLeaf(lpage) )
|
||||
btree->pitem.key = *(ItemPointerData*)GinDataPageGetItem(lpage,
|
||||
GinPageGetOpaque(lpage)->maxoff); |
||||
else
|
||||
btree->pitem.key = ((PostingItem*)GinDataPageGetItem(lpage,
|
||||
GinPageGetOpaque(lpage)->maxoff))->key; |
||||
btree->rightblkno = BufferGetBlockNumber(rbuf); |
||||
|
||||
/* set up right bound for left page */ |
||||
bound = GinDataPageGetRightBound(lpage); |
||||
*bound = btree->pitem.key; |
||||
|
||||
/* set up right bound for right page */ |
||||
bound = GinDataPageGetRightBound(rpage); |
||||
*bound = oldbound; |
||||
|
||||
data.node = btree->index->rd_node; |
||||
data.rootBlkno = InvalidBlockNumber; |
||||
data.lblkno = BufferGetBlockNumber( lbuf ); |
||||
data.rblkno = BufferGetBlockNumber( rbuf ); |
||||
data.separator = separator; |
||||
data.nitem = maxoff; |
||||
data.isData = TRUE; |
||||
data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; |
||||
data.isRootSplit = FALSE; |
||||
data.rightbound = oldbound; |
||||
|
||||
rdata[0].buffer = InvalidBuffer; |
||||
rdata[0].data = (char *) &data; |
||||
rdata[0].len = sizeof(ginxlogSplit); |
||||
rdata[0].next = &rdata[1]; |
||||
|
||||
rdata[1].buffer = InvalidBuffer; |
||||
rdata[1].data = vector; |
||||
rdata[1].len = MAXALIGN( maxoff * sizeofitem );
|
||||
rdata[1].next = NULL; |
||||
|
||||
return lpage; |
||||
} |
||||
|
||||
/*
|
||||
* Fills new root by right bound values from child.
|
||||
* Also called from ginxlog, should not use btree |
||||
*/ |
||||
void |
||||
dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) { |
||||
Page page = BufferGetPage(root), |
||||
lpage = BufferGetPage(lbuf), |
||||
rpage = BufferGetPage(rbuf); |
||||
PostingItem li, ri; |
||||
|
||||
li.key = *GinDataPageGetRightBound(lpage); |
||||
PostingItemSetBlockNumber( &li, BufferGetBlockNumber(lbuf) ); |
||||
GinDataPageAddItem(page, &li, InvalidOffsetNumber ); |
||||
|
||||
ri.key = *GinDataPageGetRightBound(rpage); |
||||
PostingItemSetBlockNumber( &ri, BufferGetBlockNumber(rbuf) ); |
||||
GinDataPageAddItem(page, &ri, InvalidOffsetNumber ); |
||||
} |
||||
|
||||
void |
||||
prepareDataScan( GinBtree btree, Relation index) { |
||||
memset(btree, 0, sizeof(GinBtreeData)); |
||||
btree->index = index; |
||||
btree->isMoveRight = dataIsMoveRight; |
||||
btree->findChildPage = dataLocateItem; |
||||
btree->findItem = dataLocateLeafItem; |
||||
btree->findChildPtr = dataFindChildPtr; |
||||
btree->getLeftMostPage = dataGetLeftMostPage; |
||||
btree->isEnoughSpace = dataIsEnoughSpace; |
||||
btree->placeToPage = dataPlaceToPage; |
||||
btree->splitPage = dataSplitPage; |
||||
btree->fillRoot = dataFillRoot; |
||||
|
||||
btree->searchMode = FALSE; |
||||
btree->isDelete = FALSE; |
||||
btree->fullScan = FALSE; |
||||
btree->isBuild= FALSE; |
||||
} |
||||
|
||||
GinPostingTreeScan* |
||||
prepareScanPostingTree( Relation index, BlockNumber rootBlkno, bool searchMode) { |
||||
GinPostingTreeScan *gdi = (GinPostingTreeScan*)palloc0( sizeof(GinPostingTreeScan) ); |
||||
|
||||
prepareDataScan( &gdi->btree, index ); |
||||
|
||||
gdi->btree.searchMode = searchMode; |
||||
gdi->btree.fullScan = searchMode; |
||||
|
||||
gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno ); |
||||
|
||||
return gdi; |
||||
} |
||||
|
||||
/*
|
||||
* Inserts array of item pointers, may execute several tree scan (very rare) |
||||
*/ |
||||
void |
||||
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) { |
||||
BlockNumber rootBlkno = gdi->stack->blkno; |
||||
|
||||
gdi->btree.items = items; |
||||
gdi->btree.nitem = nitem; |
||||
gdi->btree.curitem = 0; |
||||
|
||||
while( gdi->btree.curitem < gdi->btree.nitem ) { |
||||
if (!gdi->stack) |
||||
gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno ); |
||||
|
||||
gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack );
|
||||
|
||||
if ( gdi->btree.findItem( &(gdi->btree), gdi->stack ) ) |
||||
elog(ERROR,"Item pointer(%d:%d) is already exists",
|
||||
ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
|
||||
ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem)); |
||||
|
||||
ginInsertValue(&(gdi->btree), gdi->stack); |
||||
|
||||
gdi->stack=NULL; |
||||
} |
||||
} |
||||
|
||||
Buffer |
||||
scanBeginPostingTree( GinPostingTreeScan *gdi ) { |
||||
gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack ); |
||||
return gdi->stack->buffer; |
||||
} |
||||
|
@ -0,0 +1,532 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginentrypage.c |
||||
* page utilities routines for the postgres inverted index access method. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "access/tuptoaster.h" |
||||
|
||||
/*
|
||||
* forms tuple for entry tree. On leaf page, Index tuple has |
||||
* non-traditional layout. Tuple may contain posting list or |
||||
* root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno) |
||||
* 1) Posting list |
||||
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial |
||||
* - ItemPointerGetBlockNumber(&itup->t_tid) contains original |
||||
* size of tuple (without posting list).
|
||||
* Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) |
||||
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains number |
||||
* of elements in posting list (number of heap itempointer) |
||||
* Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n) |
||||
* - After usial part of tuple there is a posting list |
||||
* Macros: GinGetPosting(itup) |
||||
* 2) Posting tree |
||||
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial |
||||
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
|
||||
* root of posting tree |
||||
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains magick number GIN_TREE_POSTING |
||||
*/ |
||||
IndexTuple |
||||
GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd) { |
||||
bool isnull=FALSE; |
||||
IndexTuple itup; |
||||
|
||||
itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
|
||||
|
||||
GinSetOrigSizePosting( itup, IndexTupleSize(itup) ); |
||||
|
||||
if ( nipd > 0 ) { |
||||
uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData)*nipd); |
||||
|
||||
if ( newsize >= INDEX_SIZE_MASK ) |
||||
return NULL; |
||||
|
||||
if ( newsize > TOAST_INDEX_TARGET && nipd > 1 ) |
||||
return NULL; |
||||
|
||||
itup = repalloc( itup, newsize ); |
||||
|
||||
/* set new size */ |
||||
itup->t_info &= ~INDEX_SIZE_MASK;
|
||||
itup->t_info |= newsize; |
||||
|
||||
if ( ipd ) |
||||
memcpy( GinGetPosting(itup), ipd, sizeof(ItemPointerData)*nipd );
|
||||
GinSetNPosting(itup, nipd);
|
||||
} else { |
||||
GinSetNPosting(itup, 0);
|
||||
} |
||||
return itup; |
||||
} |
||||
|
||||
/*
|
||||
* Entry tree is a "static", ie tuple never deletes from it, |
||||
* so we don't use right bound, we use rightest key instead. |
||||
*/ |
||||
static IndexTuple |
||||
getRightMostTuple(Page page) { |
||||
OffsetNumber maxoff = PageGetMaxOffsetNumber(page); |
||||
return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff)); |
||||
} |
||||
|
||||
Datum |
||||
ginGetHighKey(GinState *ginstate, Page page) { |
||||
IndexTuple itup; |
||||
bool isnull; |
||||
|
||||
itup = getRightMostTuple(page); |
||||
|
||||
return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
|
||||
} |
||||
|
||||
static bool
|
||||
entryIsMoveRight(GinBtree btree, Page page) { |
||||
Datum highkey; |
||||
|
||||
if ( GinPageRightMost(page) ) |
||||
return FALSE; |
||||
|
||||
highkey = ginGetHighKey(btree->ginstate, page); |
||||
|
||||
if ( compareEntries(btree->ginstate, btree->entryValue, highkey) > 0 ) |
||||
return TRUE; |
||||
|
||||
return FALSE; |
||||
} |
||||
|
||||
/*
|
||||
* Find correct tuple in non-leaf page. It supposed that |
||||
* page correctly choosen and searching value SHOULD be on page |
||||
*/ |
||||
static BlockNumber |
||||
entryLocateEntry(GinBtree btree, GinBtreeStack *stack) { |
||||
OffsetNumber low, high, maxoff; |
||||
IndexTuple itup; |
||||
int result; |
||||
Page page = BufferGetPage( stack->buffer ); |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
if ( btree->fullScan ) { |
||||
stack->off = FirstOffsetNumber; |
||||
stack->predictNumber *= PageGetMaxOffsetNumber(page); |
||||
return btree->getLeftMostPage(btree, page); |
||||
} |
||||
|
||||
low = FirstOffsetNumber; |
||||
maxoff = high = PageGetMaxOffsetNumber(page); |
||||
Assert( high >= low ); |
||||
|
||||
high++; |
||||
|
||||
while (high > low) { |
||||
OffsetNumber mid = low + ((high - low) / 2); |
||||
|
||||
if ( mid == maxoff && GinPageRightMost(page) ) |
||||
/* Right infinity */ |
||||
result = -1; |
||||
else { |
||||
bool isnull; |
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); |
||||
result = compareEntries(btree->ginstate, btree->entryValue,
|
||||
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) ); |
||||
} |
||||
|
||||
if ( result == 0 ) { |
||||
stack->off = mid; |
||||
Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO ); |
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid); |
||||
} else if ( result > 0 ) |
||||
low = mid + 1; |
||||
else |
||||
high = mid; |
||||
} |
||||
|
||||
Assert( high>=FirstOffsetNumber && high <= maxoff ); |
||||
|
||||
stack->off = high; |
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); |
||||
Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO ); |
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid); |
||||
} |
||||
|
||||
/*
|
||||
* Searches correct position for value on leaf page. |
||||
* Page should be corrrectly choosen. |
||||
* Returns true if value found on page. |
||||
*/ |
||||
static bool |
||||
entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) { |
||||
Page page = BufferGetPage( stack->buffer ); |
||||
OffsetNumber low, high; |
||||
IndexTuple itup; |
||||
|
||||
Assert( GinPageIsLeaf(page) ); |
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
if ( btree->fullScan ) { |
||||
stack->off = FirstOffsetNumber; |
||||
return TRUE; |
||||
} |
||||
|
||||
low = FirstOffsetNumber; |
||||
high = PageGetMaxOffsetNumber(page); |
||||
|
||||
if ( high < low ) { |
||||
stack->off = FirstOffsetNumber; |
||||
return false; |
||||
} |
||||
|
||||
high++; |
||||
|
||||
while (high > low) { |
||||
OffsetNumber mid = low + ((high - low) / 2); |
||||
bool isnull; |
||||
int result; |
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); |
||||
result = compareEntries(btree->ginstate, btree->entryValue, |
||||
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) ); |
||||
|
||||
if ( result == 0 ) { |
||||
stack->off = mid; |
||||
return true; |
||||
} else if ( result > 0 )
|
||||
low = mid + 1; |
||||
else |
||||
high = mid; |
||||
} |
||||
|
||||
stack->off = high; |
||||
return false; |
||||
} |
||||
|
||||
static OffsetNumber |
||||
entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { |
||||
OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page); |
||||
IndexTuple itup; |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
/* if page isn't changed, we returns storedOff */ |
||||
if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) { |
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); |
||||
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno ) |
||||
return storedOff; |
||||
|
||||
/* we hope, that needed pointer goes to right. It's true
|
||||
if there wasn't a deletion */ |
||||
for( i=storedOff+1 ; i <= maxoff ; i++ ) { |
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
||||
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno ) |
||||
return i; |
||||
} |
||||
maxoff = storedOff-1; |
||||
} |
||||
|
||||
/* last chance */ |
||||
for( i=FirstOffsetNumber; i <= maxoff ; i++ ) {
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
||||
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno ) |
||||
return i; |
||||
} |
||||
|
||||
return InvalidOffsetNumber; |
||||
} |
||||
|
||||
static BlockNumber |
||||
entryGetLeftMostPage(GinBtree btree, Page page) { |
||||
IndexTuple itup; |
||||
|
||||
Assert( !GinPageIsLeaf(page) ); |
||||
Assert( !GinPageIsData(page) ); |
||||
Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber ); |
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); |
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
|
||||
} |
||||
|
||||
static bool |
||||
entryIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) { |
||||
Size itupsz = 0; |
||||
Page page = BufferGetPage(buf); |
||||
|
||||
Assert( btree->entry ); |
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
if ( btree->isDelete ) { |
||||
IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off)); |
||||
itupsz = MAXALIGN( IndexTupleSize( itup ) ) + sizeof(ItemIdData); |
||||
} |
||||
|
||||
if ( PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData) ) |
||||
return true; |
||||
|
||||
return false; |
||||
} |
||||
|
||||
/*
|
||||
* Delete tuple on leaf page if tuples was existed and we |
||||
* should update it, update old child blkno to new right page |
||||
* if child split is occured |
||||
*/ |
||||
static BlockNumber |
||||
entryPreparePage( GinBtree btree, Page page, OffsetNumber off) { |
||||
BlockNumber ret = InvalidBlockNumber; |
||||
|
||||
Assert( btree->entry ); |
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
if ( btree->isDelete ) { |
||||
Assert( GinPageIsLeaf(page) ); |
||||
PageIndexTupleDelete(page, off); |
||||
} |
||||
|
||||
if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) { |
||||
IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off)); |
||||
ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber); |
||||
ret = btree->rightblkno; |
||||
} |
||||
|
||||
btree->rightblkno = InvalidBlockNumber; |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
/*
|
||||
* Place tuple on page and fills WAL record |
||||
*/ |
||||
static void |
||||
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { |
||||
Page page = BufferGetPage(buf); |
||||
static XLogRecData rdata[3]; |
||||
OffsetNumber placed; |
||||
static ginxlogInsert data; |
||||
|
||||
*prdata = rdata; |
||||
data.updateBlkno = entryPreparePage( btree, page, off ); |
||||
|
||||
placed = PageAddItem( page, (Item)btree->entry, IndexTupleSize(btree->entry), off, LP_USED); |
||||
if ( placed != off ) |
||||
elog(ERROR, "failed to add item to index page in \"%s\"", |
||||
RelationGetRelationName(btree->index)); |
||||
|
||||
data.node = btree->index->rd_node; |
||||
data.blkno = BufferGetBlockNumber( buf ); |
||||
data.offset = off; |
||||
data.nitem = 1; |
||||
data.isDelete = btree->isDelete; |
||||
data.isData = false; |
||||
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; |
||||
|
||||
rdata[0].buffer = buf; |
||||
rdata[0].buffer_std = TRUE; |
||||
rdata[0].data = NULL; |
||||
rdata[0].len = 0; |
||||
rdata[0].next = &rdata[1]; |
||||
|
||||
rdata[1].buffer = InvalidBuffer; |
||||
rdata[1].data = (char *) &data; |
||||
rdata[1].len = sizeof(ginxlogInsert); |
||||
rdata[1].next = &rdata[2]; |
||||
|
||||
rdata[2].buffer = InvalidBuffer; |
||||
rdata[2].data = (char *) btree->entry; |
||||
rdata[2].len = IndexTupleSize(btree->entry); |
||||
rdata[2].next = NULL; |
||||
|
||||
btree->entry = NULL; |
||||
} |
||||
|
||||
/*
|
||||
* Place tuple and split page, original buffer(lbuf) leaves untouched, |
||||
* returns shadow page of lbuf filled new data. |
||||
* Tuples are distributed between pages by equal size on its, not |
||||
* an equal number! |
||||
*/ |
||||
static Page |
||||
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { |
||||
static XLogRecData rdata[2]; |
||||
OffsetNumber i, maxoff, separator=InvalidOffsetNumber; |
||||
Size totalsize=0; |
||||
Size lsize = 0, size; |
||||
static char tupstore[ 2*BLCKSZ ]; |
||||
char *ptr; |
||||
IndexTuple itup, leftrightmost=NULL; |
||||
static ginxlogSplit data; |
||||
Datum value; |
||||
bool isnull; |
||||
Page page; |
||||
Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) ); |
||||
Page rpage = BufferGetPage( rbuf ); |
||||
Size pageSize = PageGetPageSize( lpage );
|
||||
|
||||
*prdata = rdata; |
||||
data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ?
|
||||
InvalidOffsetNumber : GinItemPointerGetBlockNumber( &(btree->entry->t_tid) ); |
||||
data.updateBlkno = entryPreparePage( btree, lpage, off ); |
||||
|
||||
maxoff = PageGetMaxOffsetNumber(lpage); |
||||
ptr = tupstore;
|
||||
|
||||
for(i=FirstOffsetNumber; i<=maxoff; i++) { |
||||
if ( i==off ) { |
||||
size = MAXALIGN( IndexTupleSize(btree->entry) ); |
||||
memcpy(ptr, btree->entry, size); |
||||
ptr+=size; |
||||
totalsize += size + sizeof(ItemIdData); |
||||
} |
||||
|
||||
itup = (IndexTuple)PageGetItem(lpage, PageGetItemId(lpage, i)); |
||||
size = MAXALIGN( IndexTupleSize(itup) ); |
||||
memcpy(ptr, itup, size); |
||||
ptr+=size; |
||||
totalsize += size + sizeof(ItemIdData); |
||||
} |
||||
|
||||
if ( off==maxoff+1 ) { |
||||
size = MAXALIGN( IndexTupleSize(btree->entry) ); |
||||
memcpy(ptr, btree->entry, size); |
||||
ptr+=size; |
||||
totalsize += size + sizeof(ItemIdData); |
||||
} |
||||
|
||||
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
|
||||
GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize );
|
||||
|
||||
ptr = tupstore; |
||||
maxoff++;
|
||||
lsize = 0; |
||||
|
||||
page = lpage; |
||||
for(i=FirstOffsetNumber; i<=maxoff; i++) { |
||||
itup = (IndexTuple)ptr; |
||||
|
||||
if ( lsize > totalsize/2 ) { |
||||
if ( separator==InvalidOffsetNumber ) |
||||
separator = i-1; |
||||
page = rpage; |
||||
} else { |
||||
leftrightmost = itup; |
||||
lsize += MAXALIGN( IndexTupleSize(itup) ) + sizeof(ItemIdData); |
||||
} |
||||
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index page in \"%s\"", |
||||
RelationGetRelationName(btree->index)); |
||||
ptr += MAXALIGN( IndexTupleSize(itup) ); |
||||
} |
||||
|
||||
value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull); |
||||
btree->entry = GinFormTuple( btree->ginstate, value, NULL, 0); |
||||
ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber( lbuf ), InvalidOffsetNumber); |
||||
btree->rightblkno = BufferGetBlockNumber( rbuf ); |
||||
|
||||
data.node = btree->index->rd_node; |
||||
data.rootBlkno = InvalidBlockNumber; |
||||
data.lblkno = BufferGetBlockNumber( lbuf ); |
||||
data.rblkno = BufferGetBlockNumber( rbuf ); |
||||
data.separator = separator; |
||||
data.nitem = maxoff; |
||||
data.isData = FALSE; |
||||
data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; |
||||
data.isRootSplit = FALSE; |
||||
|
||||
rdata[0].buffer = InvalidBuffer; |
||||
rdata[0].data = (char *) &data; |
||||
rdata[0].len = sizeof(ginxlogSplit); |
||||
rdata[0].next = &rdata[1]; |
||||
|
||||
rdata[1].buffer = InvalidBuffer; |
||||
rdata[1].data = tupstore; |
||||
rdata[1].len = MAXALIGN(totalsize); |
||||
rdata[1].next = NULL; |
||||
|
||||
return lpage; |
||||
} |
||||
|
||||
/*
|
||||
* return newly allocate rightmost tuple |
||||
*/ |
||||
IndexTuple |
||||
ginPageGetLinkItup(Buffer buf) { |
||||
IndexTuple itup, nitup; |
||||
Page page = BufferGetPage(buf); |
||||
|
||||
itup = getRightMostTuple( page ); |
||||
if ( GinPageIsLeaf(page) && !GinIsPostingTree(itup) ) { |
||||
nitup = (IndexTuple)palloc( MAXALIGN(GinGetOrigSizePosting(itup)) ); |
||||
memcpy( nitup, itup, GinGetOrigSizePosting(itup) ); |
||||
nitup->t_info &= ~INDEX_SIZE_MASK; |
||||
nitup->t_info |= GinGetOrigSizePosting(itup); |
||||
} else { |
||||
nitup = (IndexTuple)palloc( MAXALIGN(IndexTupleSize(itup)) ); |
||||
memcpy( nitup, itup, IndexTupleSize(itup) ); |
||||
} |
||||
|
||||
ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber); |
||||
return nitup; |
||||
} |
||||
|
||||
/*
|
||||
* Fills new root by rightest values from child. |
||||
* Also called from ginxlog, should not use btree |
||||
*/ |
||||
void |
||||
entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) { |
||||
Page page; |
||||
IndexTuple itup; |
||||
|
||||
page = BufferGetPage(root); |
||||
|
||||
itup = ginPageGetLinkItup( lbuf ); |
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index root page"); |
||||
|
||||
itup = ginPageGetLinkItup( rbuf ); |
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index root page"); |
||||
} |
||||
|
||||
void |
||||
prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstate) { |
||||
memset(btree, 0, sizeof(GinBtreeData)); |
||||
|
||||
btree->isMoveRight = entryIsMoveRight; |
||||
btree->findChildPage = entryLocateEntry; |
||||
btree->findItem = entryLocateLeafEntry; |
||||
btree->findChildPtr = entryFindChildPtr; |
||||
btree->getLeftMostPage = entryGetLeftMostPage; |
||||
btree->isEnoughSpace = entryIsEnoughSpace; |
||||
btree->placeToPage = entryPlaceToPage; |
||||
btree->splitPage = entrySplitPage; |
||||
btree->fillRoot = entryFillRoot; |
||||
|
||||
btree->index = index; |
||||
btree->ginstate = ginstate; |
||||
btree->entryValue = value; |
||||
|
||||
btree->isDelete = FALSE; |
||||
btree->searchMode = FALSE; |
||||
btree->fullScan = FALSE; |
||||
btree->isBuild = FALSE; |
||||
} |
||||
|
@ -0,0 +1,412 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginget.c |
||||
* fetch tuples from a GIN scan. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "utils/memutils.h" |
||||
|
||||
static OffsetNumber |
||||
findItemInPage( Page page, ItemPointer item, OffsetNumber off ) { |
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; |
||||
int res; |
||||
|
||||
for(; off<=maxoff; off++) { |
||||
res = compareItemPointers( item, (ItemPointer)GinDataPageGetItem(page, off) ); |
||||
Assert( res>= 0 ); |
||||
|
||||
if ( res == 0 ) |
||||
return off; |
||||
} |
||||
|
||||
return InvalidOffsetNumber; |
||||
} |
||||
|
||||
/*
|
||||
* Start* functions setup state of searches: find correct buffer and locks it, |
||||
* Stop* functions unlock buffer (but don't release!) |
||||
*/ |
||||
static void |
||||
startScanEntry( Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall ) { |
||||
if ( entry->master != NULL ) { |
||||
entry->isFinished = entry->master->isFinished; |
||||
return; |
||||
} |
||||
|
||||
if ( firstCall ) { |
||||
/* at first call we should find entry, and
|
||||
begin scan of posting tree or just store posting list in memory */ |
||||
GinBtreeData btreeEntry; |
||||
GinBtreeStack *stackEntry; |
||||
Page page; |
||||
bool needUnlock = TRUE; |
||||
|
||||
prepareEntryScan( &btreeEntry, index, entry->entry, ginstate ); |
||||
btreeEntry.searchMode = TRUE; |
||||
stackEntry = ginFindLeafPage(&btreeEntry, NULL); |
||||
page = BufferGetPage( stackEntry->buffer ); |
||||
|
||||
entry->isFinished = TRUE; |
||||
entry->buffer = InvalidBuffer; |
||||
entry->offset = InvalidOffsetNumber; |
||||
entry->list = NULL; |
||||
entry->nlist = 0; |
||||
entry->reduceResult = FALSE; |
||||
entry->predictNumberResult = 0; |
||||
|
||||
if ( btreeEntry.findItem( &btreeEntry, stackEntry ) ) { |
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); |
||||
|
||||
if ( GinIsPostingTree(itup) ) { |
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup); |
||||
GinPostingTreeScan *gdi; |
||||
Page page; |
||||
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK); |
||||
needUnlock = FALSE; |
||||
gdi = prepareScanPostingTree( index, rootPostingTree, TRUE ); |
||||
|
||||
entry->buffer = scanBeginPostingTree( gdi ); |
||||
IncrBufferRefCount( entry->buffer ); |
||||
|
||||
page = BufferGetPage( entry->buffer ); |
||||
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
||||
|
||||
freeGinBtreeStack( gdi->stack ); |
||||
pfree( gdi ); |
||||
entry->isFinished = FALSE; |
||||
} else if ( GinGetNPosting(itup) > 0 ) { |
||||
entry->nlist = GinGetNPosting(itup); |
||||
entry->list = (ItemPointerData*)palloc( sizeof(ItemPointerData) * entry->nlist ); |
||||
memcpy( entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist ); |
||||
entry->isFinished = FALSE; |
||||
} |
||||
} |
||||
|
||||
if ( needUnlock ) |
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK); |
||||
freeGinBtreeStack( stackEntry ); |
||||
} else if ( entry->buffer != InvalidBuffer ) { |
||||
/* we should find place were we was stopped */ |
||||
BlockNumber blkno; |
||||
Page page; |
||||
|
||||
LockBuffer( entry->buffer, GIN_SHARE ); |
||||
|
||||
if ( !ItemPointerIsValid( &entry->curItem ) ) |
||||
/* start position */ |
||||
return; |
||||
Assert( entry->offset!=InvalidOffsetNumber ); |
||||
|
||||
page = BufferGetPage( entry->buffer ); |
||||
|
||||
/* try to find curItem in current buffer */ |
||||
if ( (entry->offset=findItemInPage(page , &entry->curItem, entry->offset))!=InvalidOffsetNumber ) |
||||
return; |
||||
|
||||
/* walk to right */ |
||||
while( (blkno = GinPageGetOpaque( page )->rightlink)!=InvalidBlockNumber ) { |
||||
LockBuffer( entry->buffer, GIN_UNLOCK ); |
||||
entry->buffer = ReleaseAndReadBuffer( entry->buffer, index, blkno ); |
||||
LockBuffer( entry->buffer, GIN_SHARE ); |
||||
page = BufferGetPage( entry->buffer ); |
||||
|
||||
if ( (entry->offset=findItemInPage(page , &entry->curItem, FirstOffsetNumber))!=InvalidOffsetNumber ) |
||||
return; |
||||
} |
||||
|
||||
elog(ERROR,"Logic error: lost previously founded ItemId"); |
||||
} |
||||
} |
||||
|
||||
static void |
||||
stopScanEntry( GinScanEntry entry ) { |
||||
if ( entry->buffer != InvalidBuffer ) |
||||
LockBuffer( entry->buffer, GIN_UNLOCK ); |
||||
} |
||||
|
||||
static void |
||||
startScanKey( Relation index, GinState *ginstate, GinScanKey key ) { |
||||
uint32 i; |
||||
|
||||
for(i=0;i<key->nentries;i++) |
||||
startScanEntry( index, ginstate, key->scanEntry+i, key->firstCall ); |
||||
|
||||
if ( key->firstCall ) {
|
||||
memset( key->entryRes, TRUE, sizeof(bool) * key->nentries ); |
||||
key->isFinished = FALSE; |
||||
key->firstCall = FALSE; |
||||
|
||||
if ( GinFuzzySearchLimit > 0 ) { |
||||
/*
|
||||
* If all of keys more than treshold we will try to reduce |
||||
* result, we hope (and only hope, for intersection operation of array |
||||
* our supposition isn't true), that total result will not more |
||||
* than minimal predictNumberResult. |
||||
*/ |
||||
|
||||
for(i=0;i<key->nentries;i++) |
||||
if ( key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit ) |
||||
return;
|
||||
|
||||
for(i=0;i<key->nentries;i++) |
||||
if ( key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit ) {
|
||||
key->scanEntry[i].predictNumberResult /= key->nentries; |
||||
key->scanEntry[i].reduceResult = TRUE; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void |
||||
stopScanKey( GinScanKey key ) { |
||||
uint32 i; |
||||
|
||||
for(i=0;i<key->nentries;i++) |
||||
stopScanEntry( key->scanEntry+i ); |
||||
} |
||||
|
||||
static void |
||||
startScan( IndexScanDesc scan ) { |
||||
uint32 i; |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
|
||||
for(i=0; i<so->nkeys; i++)
|
||||
startScanKey( scan->indexRelation, &so->ginstate, so->keys + i );
|
||||
} |
||||
|
||||
static void |
||||
stopScan( IndexScanDesc scan ) { |
||||
uint32 i; |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
|
||||
for(i=0; i<so->nkeys; i++)
|
||||
stopScanKey( so->keys + i );
|
||||
} |
||||
|
||||
|
||||
static void |
||||
entryGetNextItem( Relation index, GinScanEntry entry ) { |
||||
Page page = BufferGetPage( entry->buffer ); |
||||
|
||||
entry->offset++; |
||||
if ( entry->offset <= GinPageGetOpaque( page )->maxoff && GinPageGetOpaque( page )->maxoff >= FirstOffsetNumber ) {
|
||||
entry->curItem = *(ItemPointerData*)GinDataPageGetItem(page, entry->offset); |
||||
} else { |
||||
BlockNumber blkno = GinPageGetOpaque( page )->rightlink; |
||||
|
||||
LockBuffer( entry->buffer, GIN_UNLOCK ); |
||||
if ( blkno == InvalidBlockNumber ) { |
||||
ReleaseBuffer( entry->buffer ); |
||||
entry->buffer = InvalidBuffer; |
||||
entry->isFinished = TRUE; |
||||
} else { |
||||
entry->buffer = ReleaseAndReadBuffer( entry->buffer, index, blkno ); |
||||
LockBuffer( entry->buffer, GIN_SHARE ); |
||||
entry->offset = InvalidOffsetNumber; |
||||
entryGetNextItem(index, entry); |
||||
} |
||||
} |
||||
} |
||||
|
||||
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE)) |
||||
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) ) |
||||
|
||||
/*
|
||||
* Sets entry->curItem to new found heap item pointer for one
|
||||
* entry of one scan key |
||||
*/ |
||||
static bool |
||||
entryGetItem( Relation index, GinScanEntry entry ) { |
||||
if ( entry->master ) { |
||||
entry->isFinished = entry->master->isFinished; |
||||
entry->curItem = entry->master->curItem; |
||||
} else if ( entry->list ) { |
||||
entry->offset++; |
||||
if ( entry->offset <= entry->nlist )
|
||||
entry->curItem = entry->list[ entry->offset - 1 ]; |
||||
else { |
||||
ItemPointerSet( &entry->curItem, InvalidBlockNumber, InvalidOffsetNumber ); |
||||
entry->isFinished = TRUE; |
||||
} |
||||
} else { |
||||
do { |
||||
entryGetNextItem(index, entry); |
||||
} while ( entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry) );
|
||||
} |
||||
|
||||
return entry->isFinished; |
||||
} |
||||
|
||||
/*
|
||||
* Sets key->curItem to new found heap item pointer for one scan key |
||||
* returns isFinished! |
||||
*/ |
||||
static bool |
||||
keyGetItem( Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key ) {
|
||||
uint32 i; |
||||
GinScanEntry entry; |
||||
bool res; |
||||
MemoryContext oldCtx; |
||||
|
||||
if ( key->isFinished ) |
||||
return TRUE; |
||||
|
||||
do { |
||||
/* move forward from previously value and set new curItem,
|
||||
which is minimal from entries->curItems */ |
||||
ItemPointerSetMax( &key->curItem ); |
||||
for(i=0;i<key->nentries;i++) { |
||||
entry = key->scanEntry+i; |
||||
|
||||
if ( key->entryRes[i] ) { |
||||
if ( entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE ) { |
||||
if (compareItemPointers( &entry->curItem, &key->curItem ) < 0) |
||||
key->curItem = entry->curItem; |
||||
} else |
||||
key->entryRes[i] = FALSE; |
||||
} else if ( entry->isFinished == FALSE ) {
|
||||
if (compareItemPointers( &entry->curItem, &key->curItem ) < 0) |
||||
key->curItem = entry->curItem; |
||||
}
|
||||
} |
||||
|
||||
if ( ItemPointerIsMax( &key->curItem ) ) { |
||||
/* all entries are finished */ |
||||
key->isFinished = TRUE; |
||||
return TRUE; |
||||
} |
||||
|
||||
if ( key->nentries == 1 ) { |
||||
/* we can do not call consistentFn !! */ |
||||
key->entryRes[0] = TRUE; |
||||
return FALSE; |
||||
} |
||||
|
||||
/* setting up array for consistentFn */ |
||||
for(i=0;i<key->nentries;i++) { |
||||
entry = key->scanEntry+i; |
||||
|
||||
if ( entry->isFinished == FALSE && compareItemPointers( &entry->curItem, &key->curItem )==0 ) |
||||
key->entryRes[i] = TRUE; |
||||
else |
||||
key->entryRes[i] = FALSE; |
||||
} |
||||
|
||||
oldCtx = MemoryContextSwitchTo(tempCtx); |
||||
res = DatumGetBool( FunctionCall3( |
||||
&ginstate->consistentFn, |
||||
PointerGetDatum( key->entryRes ), |
||||
UInt16GetDatum( key->strategy ), |
||||
key->query |
||||
)); |
||||
MemoryContextSwitchTo(oldCtx); |
||||
MemoryContextReset(tempCtx); |
||||
} while( !res ); |
||||
|
||||
return FALSE; |
||||
} |
||||
|
||||
/*
|
||||
* Get heap item pointer from scan
|
||||
* returns true if found
|
||||
*/ |
||||
static bool |
||||
scanGetItem( IndexScanDesc scan, ItemPointerData *item ) { |
||||
uint32 i; |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
|
||||
ItemPointerSetMin( item );
|
||||
for(i=0;i<so->nkeys;i++) { |
||||
GinScanKey key = so->keys+i; |
||||
|
||||
if ( keyGetItem( scan->indexRelation, &so->ginstate, so->tempCtx, key )==FALSE ) { |
||||
if ( compareItemPointers( item, &key->curItem ) < 0 ) |
||||
*item = key->curItem; |
||||
} else |
||||
return FALSE; /* finshed one of keys */ |
||||
} |
||||
|
||||
for(i=1;i<=so->nkeys;i++) { |
||||
GinScanKey key = so->keys+i-1; |
||||
|
||||
for(;;) { |
||||
int cmp = compareItemPointers( item, &key->curItem ); |
||||
|
||||
if ( cmp == 0 ) |
||||
break; |
||||
else if ( cmp > 0 ) { |
||||
if ( keyGetItem( scan->indexRelation, &so->ginstate, so->tempCtx, key )==TRUE ) |
||||
return FALSE; /* finshed one of keys */ |
||||
} else { /* returns to begin */ |
||||
*item = key->curItem; |
||||
i=0; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return TRUE;
|
||||
} |
||||
|
||||
#define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL ) |
||||
|
||||
Datum
|
||||
gingetmulti(PG_FUNCTION_ARGS) { |
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); |
||||
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); |
||||
int32 max_tids = PG_GETARG_INT32(2); |
||||
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); |
||||
|
||||
if ( GinIsNewKey(scan) ) |
||||
newScanKey( scan ); |
||||
|
||||
startScan( scan ); |
||||
|
||||
*returned_tids = 0; |
||||
|
||||
do { |
||||
if ( scanGetItem( scan, tids + *returned_tids ) )
|
||||
(*returned_tids)++; |
||||
else |
||||
break; |
||||
} while ( *returned_tids < max_tids ); |
||||
|
||||
stopScan( scan ); |
||||
|
||||
PG_RETURN_BOOL(*returned_tids == max_tids); |
||||
} |
||||
|
||||
Datum |
||||
gingettuple(PG_FUNCTION_ARGS) { |
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); |
||||
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); |
||||
bool res; |
||||
|
||||
if ( dir != ForwardScanDirection ) |
||||
elog(ERROR, "Gin doesn't support other scan directions than forward"); |
||||
|
||||
if ( GinIsNewKey(scan) ) |
||||
newScanKey( scan ); |
||||
|
||||
startScan( scan ); |
||||
res = scanGetItem(scan, &scan->xs_ctup.t_self); |
||||
stopScan( scan ); |
||||
|
||||
PG_RETURN_BOOL(res); |
||||
} |
@ -0,0 +1,374 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* gininsert.c |
||||
* insert routines for the postgres inverted index access method. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "utils/memutils.h" |
||||
#include "access/tuptoaster.h" |
||||
|
||||
typedef struct { |
||||
GinState ginstate; |
||||
double indtuples; |
||||
MemoryContext tmpCtx; |
||||
BuildAccumulator accum; |
||||
} GinBuildState; |
||||
|
||||
/*
|
||||
* Creates posting tree with one page. Function |
||||
* suppose that items[] fits to page |
||||
*/ |
||||
static BlockNumber |
||||
createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) { |
||||
BlockNumber blkno; |
||||
Buffer buffer = GinNewBuffer(index); |
||||
Page page; |
||||
|
||||
START_CRIT_SECTION(); |
||||
|
||||
GinInitBuffer( buffer, GIN_DATA|GIN_LEAF ); |
||||
page = BufferGetPage(buffer); |
||||
blkno = BufferGetBlockNumber(buffer); |
||||
|
||||
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems ); |
||||
GinPageGetOpaque(page)->maxoff = nitems; |
||||
|
||||
if (!index->rd_istemp) { |
||||
XLogRecPtr recptr; |
||||
XLogRecData rdata[2]; |
||||
ginxlogCreatePostingTree data; |
||||
|
||||
data.node = index->rd_node; |
||||
data.blkno = blkno; |
||||
data.nitem = nitems; |
||||
|
||||
rdata[0].buffer = InvalidBuffer; |
||||
rdata[0].data = (char *) &data; |
||||
rdata[0].len = sizeof(ginxlogCreatePostingTree); |
||||
rdata[0].next = &rdata[1]; |
||||
|
||||
rdata[1].buffer = InvalidBuffer; |
||||
rdata[1].data = (char *) items; |
||||
rdata[1].len = sizeof(ItemPointerData) * nitems; |
||||
rdata[1].next = NULL; |
||||
|
||||
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
|
||||
}
|
||||
|
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
|
||||
END_CRIT_SECTION(); |
||||
|
||||
return blkno; |
||||
} |
||||
|
||||
|
||||
/*
|
||||
* Adds array of item pointers to tuple's posting list or |
||||
* creates posting tree and tuple pointed to tree in a case |
||||
* of not enough space. Max size of tuple is defined in |
||||
* GinFormTuple(). |
||||
*/ |
||||
static IndexTuple |
||||
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
|
||||
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) { |
||||
bool isnull; |
||||
Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull); |
||||
IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old)); |
||||
|
||||
if ( res ) { |
||||
/* good, small enough */ |
||||
MergeItemPointers( GinGetPosting(res), |
||||
GinGetPosting(old), GinGetNPosting(old), |
||||
items, nitem |
||||
); |
||||
|
||||
GinSetNPosting(res, nitem + GinGetNPosting(old)); |
||||
} else { |
||||
BlockNumber postingRoot; |
||||
GinPostingTreeScan *gdi; |
||||
|
||||
/* posting list becomes big, so we need to make posting's tree */ |
||||
res = GinFormTuple(ginstate, key, NULL, 0); |
||||
postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old)); |
||||
GinSetPostingTree(res, postingRoot); |
||||
|
||||
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
|
||||
gdi->btree.isBuild = isBuild; |
||||
|
||||
insertItemPointer(gdi, items, nitem); |
||||
|
||||
pfree(gdi); |
||||
} |
||||
|
||||
return res; |
||||
} |
||||
|
||||
/*
|
||||
* Inserts only one entry to the index, but it can adds more that 1
|
||||
* ItemPointer.
|
||||
*/ |
||||
static void |
||||
ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) { |
||||
GinBtreeData btree; |
||||
GinBtreeStack *stack; |
||||
IndexTuple itup; |
||||
Page page; |
||||
|
||||
prepareEntryScan( &btree, index, value, ginstate ); |
||||
|
||||
stack = ginFindLeafPage(&btree, NULL); |
||||
page = BufferGetPage( stack->buffer ); |
||||
|
||||
if ( btree.findItem( &btree, stack ) ) { |
||||
/* found entry */ |
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); |
||||
|
||||
if ( GinIsPostingTree(itup) ) { |
||||
/* lock root of posting tree */ |
||||
GinPostingTreeScan *gdi; |
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
|
||||
/* release all stack */ |
||||
LockBuffer(stack->buffer, GIN_UNLOCK); |
||||
freeGinBtreeStack( stack ); |
||||
|
||||
/* insert into posting tree */ |
||||
gdi = prepareScanPostingTree( index, rootPostingTree, FALSE ); |
||||
gdi->btree.isBuild = isBuild; |
||||
insertItemPointer(gdi, items, nitem); |
||||
|
||||
return; |
||||
} |
||||
|
||||
itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild); |
||||
|
||||
btree.isDelete = TRUE; |
||||
} else { |
||||
/* We suppose, that tuple can store at list one itempointer */ |
||||
itup = GinFormTuple( ginstate, value, items, 1); |
||||
if ( itup==NULL || IndexTupleSize(itup) >= GinMaxItemSize ) |
||||
elog(ERROR, "huge tuple"); |
||||
|
||||
if ( nitem>1 ) { |
||||
IndexTuple previtup = itup; |
||||
|
||||
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items+1, nitem-1, isBuild); |
||||
pfree(previtup); |
||||
} |
||||
} |
||||
|
||||
btree.entry = itup; |
||||
ginInsertValue(&btree, stack); |
||||
pfree( itup ); |
||||
} |
||||
|
||||
/*
|
||||
* Saves indexed value in memory accumulator during index creation |
||||
* Function isnt use during normal insert |
||||
*/ |
||||
static uint32 |
||||
ginHeapTupleBulkInsert(BuildAccumulator *accum, Datum value, ItemPointer heapptr) { |
||||
Datum *entries; |
||||
uint32 nentries; |
||||
|
||||
entries = extractEntriesSU( accum->ginstate, value, &nentries); |
||||
|
||||
if ( nentries==0 ) |
||||
/* nothing to insert */ |
||||
return 0; |
||||
|
||||
ginInsertRecordBA( accum, heapptr, entries, nentries); |
||||
|
||||
pfree( entries ); |
||||
|
||||
return nentries; |
||||
} |
||||
|
||||
static void
|
||||
ginBuildCallback(Relation index, HeapTuple htup, Datum *values, |
||||
bool *isnull, bool tupleIsAlive, void *state) { |
||||
|
||||
GinBuildState *buildstate = (GinBuildState*)state; |
||||
MemoryContext oldCtx; |
||||
|
||||
if ( *isnull ) |
||||
return; |
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); |
||||
|
||||
buildstate->indtuples += ginHeapTupleBulkInsert(&buildstate->accum, *values, &htup->t_self); |
||||
|
||||
/* we use only half maintenance_work_mem, because there is some leaks
|
||||
during insertion and extract values */
|
||||
if ( buildstate->accum.allocatedMemory >= maintenance_work_mem*1024L/2L ) {
|
||||
ItemPointerData *list; |
||||
Datum entry; |
||||
uint32 nlist; |
||||
|
||||
while( (list=ginGetEntry(&buildstate->accum, &entry, &nlist)) != NULL ) |
||||
ginEntryInsert(index, &buildstate->ginstate, entry, list, nlist, TRUE); |
||||
|
||||
MemoryContextReset(buildstate->tmpCtx); |
||||
ginInitBA(&buildstate->accum); |
||||
} |
||||
|
||||
MemoryContextSwitchTo(oldCtx); |
||||
} |
||||
|
||||
Datum |
||||
ginbuild(PG_FUNCTION_ARGS) { |
||||
Relation heap = (Relation) PG_GETARG_POINTER(0); |
||||
Relation index = (Relation) PG_GETARG_POINTER(1); |
||||
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); |
||||
double reltuples; |
||||
GinBuildState buildstate; |
||||
Buffer buffer; |
||||
ItemPointerData *list; |
||||
Datum entry; |
||||
uint32 nlist; |
||||
MemoryContext oldCtx; |
||||
|
||||
if (RelationGetNumberOfBlocks(index) != 0) |
||||
elog(ERROR, "index \"%s\" already contains data", |
||||
RelationGetRelationName(index)); |
||||
|
||||
initGinState(&buildstate.ginstate, index); |
||||
|
||||
/* initialize the root page */ |
||||
buffer = GinNewBuffer(index); |
||||
START_CRIT_SECTION(); |
||||
GinInitBuffer(buffer, GIN_LEAF); |
||||
if (!index->rd_istemp) { |
||||
XLogRecPtr recptr; |
||||
XLogRecData rdata; |
||||
Page page; |
||||
|
||||
rdata.buffer = InvalidBuffer; |
||||
rdata.data = (char *) &(index->rd_node); |
||||
rdata.len = sizeof(RelFileNode); |
||||
rdata.next = NULL; |
||||
|
||||
page = BufferGetPage(buffer); |
||||
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
|
||||
}
|
||||
|
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
END_CRIT_SECTION(); |
||||
|
||||
/* build the index */ |
||||
buildstate.indtuples = 0; |
||||
|
||||
/*
|
||||
* create a temporary memory context that is reset once for each tuple |
||||
* inserted into the index |
||||
*/ |
||||
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, |
||||
"Gin build temporary context", |
||||
ALLOCSET_DEFAULT_MINSIZE, |
||||
ALLOCSET_DEFAULT_INITSIZE, |
||||
ALLOCSET_DEFAULT_MAXSIZE); |
||||
|
||||
buildstate.accum.ginstate = &buildstate.ginstate; |
||||
ginInitBA( &buildstate.accum ); |
||||
|
||||
/* do the heap scan */ |
||||
reltuples = IndexBuildHeapScan(heap, index, indexInfo, |
||||
ginBuildCallback, (void *) &buildstate); |
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); |
||||
while( (list=ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL ) |
||||
ginEntryInsert(index, &buildstate.ginstate, entry, list, nlist, TRUE); |
||||
MemoryContextSwitchTo(oldCtx); |
||||
|
||||
MemoryContextDelete(buildstate.tmpCtx); |
||||
|
||||
/* since we just counted the # of tuples, may as well update stats */ |
||||
IndexCloseAndUpdateStats(heap, reltuples, index, buildstate.indtuples); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/*
|
||||
* Inserts value during normal insertion |
||||
*/ |
||||
static uint32 |
||||
ginHeapTupleInsert( Relation index, GinState *ginstate, Datum value, ItemPointer item) { |
||||
Datum *entries; |
||||
uint32 i,nentries; |
||||
|
||||
entries = extractEntriesSU( ginstate, value, &nentries); |
||||
|
||||
if ( nentries==0 ) |
||||
/* nothing to insert */ |
||||
return 0; |
||||
|
||||
for(i=0;i<nentries;i++)
|
||||
ginEntryInsert(index, ginstate, entries[i], item, 1, FALSE); |
||||
|
||||
return nentries; |
||||
} |
||||
|
||||
Datum |
||||
gininsert(PG_FUNCTION_ARGS) { |
||||
Relation index = (Relation) PG_GETARG_POINTER(0); |
||||
Datum *values = (Datum *) PG_GETARG_POINTER(1); |
||||
bool *isnull = (bool *) PG_GETARG_POINTER(2); |
||||
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); |
||||
#ifdef NOT_USED |
||||
Relation heapRel = (Relation) PG_GETARG_POINTER(4); |
||||
bool checkUnique = PG_GETARG_BOOL(5); |
||||
#endif |
||||
GinState ginstate; |
||||
MemoryContext oldCtx; |
||||
MemoryContext insertCtx; |
||||
uint32 res; |
||||
|
||||
if ( *isnull ) |
||||
PG_RETURN_BOOL(false); |
||||
|
||||
insertCtx = AllocSetContextCreate(CurrentMemoryContext, |
||||
"Gin insert temporary context", |
||||
ALLOCSET_DEFAULT_MINSIZE, |
||||
ALLOCSET_DEFAULT_INITSIZE, |
||||
ALLOCSET_DEFAULT_MAXSIZE); |
||||
|
||||
oldCtx = MemoryContextSwitchTo(insertCtx); |
||||
|
||||
initGinState(&ginstate, index); |
||||
|
||||
res = ginHeapTupleInsert(index, &ginstate, *values, ht_ctid);
|
||||
|
||||
MemoryContextSwitchTo(oldCtx); |
||||
MemoryContextDelete(insertCtx); |
||||
|
||||
PG_RETURN_BOOL(res>0); |
||||
} |
||||
|
@ -0,0 +1,256 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginscan.c |
||||
* routines to manage scans inverted index relations |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "utils/memutils.h" |
||||
|
||||
|
||||
Datum
|
||||
ginbeginscan(PG_FUNCTION_ARGS) { |
||||
Relation rel = (Relation) PG_GETARG_POINTER(0); |
||||
int keysz = PG_GETARG_INT32(1); |
||||
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); |
||||
IndexScanDesc scan; |
||||
|
||||
scan = RelationGetIndexScan(rel, keysz, scankey); |
||||
|
||||
PG_RETURN_POINTER(scan); |
||||
} |
||||
|
||||
static void |
||||
fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
|
||||
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy ) { |
||||
uint32 i,j; |
||||
|
||||
key->nentries = nEntryValues; |
||||
key->entryRes = (bool*)palloc0( sizeof(bool) * nEntryValues );
|
||||
key->scanEntry = (GinScanEntry) palloc( sizeof(GinScanEntryData) * nEntryValues ); |
||||
key->strategy = strategy; |
||||
key->query = query; |
||||
key->firstCall= TRUE; |
||||
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber ); |
||||
|
||||
for(i=0; i<nEntryValues; i++) { |
||||
key->scanEntry[i].pval = key->entryRes + i; |
||||
key->scanEntry[i].entry = entryValues[i]; |
||||
ItemPointerSet( &(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber ); |
||||
key->scanEntry[i].offset = InvalidOffsetNumber; |
||||
key->scanEntry[i].buffer = InvalidBuffer; |
||||
key->scanEntry[i].list = NULL; |
||||
key->scanEntry[i].nlist = 0; |
||||
|
||||
/* link to the equals entry in current scan key */ |
||||
key->scanEntry[i].master = NULL; |
||||
for( j=0; j<i; j++) |
||||
if ( compareEntries( ginstate, entryValues[i], entryValues[j] ) == 0 ) { |
||||
key->scanEntry[i].master = key->scanEntry + j; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void |
||||
resetScanKeys(GinScanKey keys, uint32 nkeys) { |
||||
uint32 i, j; |
||||
|
||||
if ( keys == NULL ) |
||||
return; |
||||
|
||||
for(i=0;i<nkeys;i++) { |
||||
GinScanKey key = keys + i; |
||||
|
||||
key->firstCall = TRUE; |
||||
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber ); |
||||
|
||||
for(j=0;j<key->nentries;j++) { |
||||
if ( key->scanEntry[j].buffer != InvalidBuffer ) |
||||
ReleaseBuffer( key->scanEntry[i].buffer ); |
||||
|
||||
ItemPointerSet( &(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber ); |
||||
key->scanEntry[j].offset = InvalidOffsetNumber; |
||||
key->scanEntry[j].buffer = InvalidBuffer; |
||||
key->scanEntry[j].list = NULL; |
||||
key->scanEntry[j].nlist = 0; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void |
||||
freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes) { |
||||
uint32 i, j; |
||||
|
||||
if ( keys == NULL ) |
||||
return; |
||||
|
||||
for(i=0;i<nkeys;i++) { |
||||
GinScanKey key = keys + i; |
||||
|
||||
for(j=0;j<key->nentries;j++) { |
||||
if ( key->scanEntry[j].buffer != InvalidBuffer ) |
||||
ReleaseBuffer( key->scanEntry[j].buffer ); |
||||
if ( removeRes && key->scanEntry[j].list )
|
||||
pfree(key->scanEntry[j].list); |
||||
} |
||||
|
||||
if ( removeRes ) |
||||
pfree(key->entryRes); |
||||
pfree(key->scanEntry); |
||||
} |
||||
|
||||
pfree(keys); |
||||
} |
||||
|
||||
void |
||||
newScanKey( IndexScanDesc scan ) { |
||||
ScanKey scankey = scan->keyData; |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
int i; |
||||
uint32 nkeys = 0; |
||||
|
||||
so->keys = (GinScanKey) palloc( scan->numberOfKeys * sizeof(GinScanKeyData) ); |
||||
|
||||
for(i=0; i<scan->numberOfKeys; i++) { |
||||
Datum* entryValues; |
||||
uint32 nEntryValues; |
||||
|
||||
if ( scankey[i].sk_flags & SK_ISNULL ) |
||||
elog(ERROR, "Gin doesn't support NULL as scan key"); |
||||
Assert( scankey[i].sk_attno == 1 ); |
||||
|
||||
entryValues = (Datum*)DatumGetPointer( |
||||
FunctionCall3( |
||||
&so->ginstate.extractQueryFn, |
||||
scankey[i].sk_argument, |
||||
PointerGetDatum( &nEntryValues ), |
||||
UInt16GetDatum(scankey[i].sk_strategy) |
||||
) |
||||
); |
||||
if ( entryValues==NULL || nEntryValues == 0 ) |
||||
/* full scan... */ |
||||
continue; |
||||
|
||||
fillScanKey( &so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument, |
||||
entryValues, nEntryValues, scankey[i].sk_strategy ); |
||||
nkeys++; |
||||
} |
||||
|
||||
so->nkeys = nkeys; |
||||
|
||||
if ( so->nkeys == 0 ) |
||||
elog(ERROR, "Gin doesn't support full scan due to it's awful inefficiency"); |
||||
} |
||||
|
||||
Datum |
||||
ginrescan(PG_FUNCTION_ARGS) { |
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); |
||||
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); |
||||
GinScanOpaque so; |
||||
|
||||
so = (GinScanOpaque) scan->opaque; |
||||
|
||||
if ( so == NULL ) { |
||||
/* if called from ginbeginscan */ |
||||
so = (GinScanOpaque)palloc( sizeof(GinScanOpaqueData) ); |
||||
so->tempCtx = AllocSetContextCreate(CurrentMemoryContext, |
||||
"Gin scan temporary context", |
||||
ALLOCSET_DEFAULT_MINSIZE, |
||||
ALLOCSET_DEFAULT_INITSIZE, |
||||
ALLOCSET_DEFAULT_MAXSIZE); |
||||
initGinState(&so->ginstate, scan->indexRelation); |
||||
scan->opaque = so; |
||||
} else { |
||||
freeScanKeys(so->keys, so->nkeys, TRUE); |
||||
freeScanKeys(so->markPos, so->nkeys, FALSE); |
||||
} |
||||
|
||||
so->markPos=so->keys=NULL; |
||||
|
||||
if ( scankey && scan->numberOfKeys > 0 ) { |
||||
memmove(scan->keyData, scankey, |
||||
scan->numberOfKeys * sizeof(ScanKeyData)); |
||||
} |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
|
||||
Datum |
||||
ginendscan(PG_FUNCTION_ARGS) { |
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
|
||||
if ( so != NULL ) { |
||||
freeScanKeys(so->keys, so->nkeys, TRUE);
|
||||
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
||||
|
||||
MemoryContextDelete(so->tempCtx); |
||||
|
||||
pfree(so); |
||||
} |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
static GinScanKey |
||||
copyScanKeys( GinScanKey keys, uint32 nkeys ) { |
||||
GinScanKey newkeys; |
||||
uint32 i, j; |
||||
|
||||
newkeys = (GinScanKey)palloc( sizeof(GinScanKeyData) * nkeys ); |
||||
memcpy( newkeys, keys, sizeof(GinScanKeyData) * nkeys ); |
||||
|
||||
for(i=0;i<nkeys;i++) { |
||||
newkeys[i].scanEntry = (GinScanEntry)palloc(sizeof(GinScanEntryData) * keys[i].nentries ); |
||||
memcpy( newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries ); |
||||
|
||||
for(j=0;j<keys[i].nentries; j++) {
|
||||
if ( keys[i].scanEntry[j].buffer != InvalidBuffer ) |
||||
IncrBufferRefCount( keys[i].scanEntry[j].buffer ); |
||||
if ( keys[i].scanEntry[j].master ) { |
||||
int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry; |
||||
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return newkeys; |
||||
} |
||||
|
||||
Datum
|
||||
ginmarkpos(PG_FUNCTION_ARGS) { |
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
|
||||
freeScanKeys(so->markPos, so->nkeys, FALSE); |
||||
so->markPos = copyScanKeys( so->keys, so->nkeys ); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
Datum
|
||||
ginrestrpos(PG_FUNCTION_ARGS) { |
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); |
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque; |
||||
|
||||
freeScanKeys(so->keys, so->nkeys, FALSE); |
||||
so->keys = copyScanKeys( so->markPos, so->nkeys ); |
||||
|
||||
PG_RETURN_VOID(); |
||||
} |
@ -0,0 +1,203 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginutil.c |
||||
* utilities routines for the postgres inverted index access method. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
|
||||
void
|
||||
initGinState( GinState *state, Relation index ) { |
||||
if ( index->rd_att->natts != 1 ) |
||||
elog(ERROR, "numberOfAttributes %d != 1", |
||||
index->rd_att->natts); |
||||
|
||||
state->tupdesc = index->rd_att; |
||||
|
||||
fmgr_info_copy(&(state->compareFn), |
||||
index_getprocinfo(index, 1, GIN_COMPARE_PROC), |
||||
CurrentMemoryContext); |
||||
fmgr_info_copy(&(state->extractValueFn), |
||||
index_getprocinfo(index, 1, GIN_EXTRACTVALUE_PROC), |
||||
CurrentMemoryContext); |
||||
fmgr_info_copy(&(state->extractQueryFn), |
||||
index_getprocinfo(index, 1, GIN_EXTRACTQUERY_PROC), |
||||
CurrentMemoryContext); |
||||
fmgr_info_copy(&(state->consistentFn), |
||||
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC), |
||||
CurrentMemoryContext); |
||||
} |
||||
|
||||
/*
|
||||
* Allocate a new page (either by recycling, or by extending the index file) |
||||
* The returned buffer is already pinned and exclusive-locked |
||||
* Caller is responsible for initializing the page by calling GinInitBuffer |
||||
*/ |
||||
|
||||
Buffer |
||||
GinNewBuffer(Relation index) { |
||||
Buffer buffer; |
||||
bool needLock; |
||||
|
||||
/* First, try to get a page from FSM */ |
||||
for(;;) { |
||||
BlockNumber blkno = GetFreeIndexPage(&index->rd_node); |
||||
if (blkno == InvalidBlockNumber) |
||||
break; |
||||
|
||||
buffer = ReadBuffer(index, blkno); |
||||
|
||||
/*
|
||||
* We have to guard against the possibility that someone else already |
||||
* recycled this page; the buffer may be locked if so. |
||||
*/ |
||||
if (ConditionalLockBuffer(buffer)) { |
||||
Page page = BufferGetPage(buffer); |
||||
|
||||
if (PageIsNew(page)) |
||||
return buffer; /* OK to use, if never initialized */ |
||||
|
||||
if (GinPageIsDeleted(page)) |
||||
return buffer; /* OK to use */ |
||||
|
||||
LockBuffer(buffer, GIN_UNLOCK); |
||||
} |
||||
|
||||
/* Can't use it, so release buffer and try again */ |
||||
ReleaseBuffer(buffer); |
||||
} |
||||
|
||||
/* Must extend the file */ |
||||
needLock = !RELATION_IS_LOCAL(index); |
||||
if (needLock) |
||||
LockRelationForExtension(index, ExclusiveLock); |
||||
|
||||
buffer = ReadBuffer(index, P_NEW); |
||||
LockBuffer(buffer, GIN_EXCLUSIVE); |
||||
|
||||
if (needLock) |
||||
UnlockRelationForExtension(index, ExclusiveLock); |
||||
|
||||
return buffer; |
||||
} |
||||
|
||||
void |
||||
GinInitPage(Page page, uint32 f, Size pageSize) { |
||||
GinPageOpaque opaque; |
||||
|
||||
PageInit(page, pageSize, sizeof(GinPageOpaqueData)); |
||||
|
||||
opaque = GinPageGetOpaque(page); |
||||
memset( opaque, 0, sizeof(GinPageOpaqueData) ); |
||||
opaque->flags = f;
|
||||
opaque->rightlink = InvalidBlockNumber; |
||||
} |
||||
|
||||
void |
||||
GinInitBuffer(Buffer b, uint32 f) { |
||||
GinInitPage( BufferGetPage(b), f, BufferGetPageSize(b) ); |
||||
} |
||||
|
||||
int |
||||
compareEntries(GinState *ginstate, Datum a, Datum b) { |
||||
return DatumGetInt32( |
||||
FunctionCall2( |
||||
&ginstate->compareFn, |
||||
a, b |
||||
) |
||||
); |
||||
} |
||||
|
||||
static FmgrInfo* cmpDatumPtr=NULL; |
||||
static bool needUnique = FALSE; |
||||
|
||||
static int |
||||
cmpEntries(const void * a, const void * b) { |
||||
int res = DatumGetInt32( |
||||
FunctionCall2( |
||||
cmpDatumPtr, |
||||
*(Datum*)a, |
||||
*(Datum*)b |
||||
) |
||||
); |
||||
|
||||
if ( res == 0 )
|
||||
needUnique = TRUE; |
||||
|
||||
return res; |
||||
} |
||||
|
||||
Datum* |
||||
extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries) { |
||||
Datum *entries; |
||||
|
||||
entries = (Datum*)DatumGetPointer( |
||||
FunctionCall2( |
||||
&ginstate->extractValueFn, |
||||
value, |
||||
PointerGetDatum( nentries ) |
||||
) |
||||
); |
||||
|
||||
if ( entries == NULL ) |
||||
*nentries = 0; |
||||
|
||||
if ( *nentries > 1 ) { |
||||
cmpDatumPtr = &ginstate->compareFn; |
||||
needUnique = FALSE; |
||||
qsort(entries, *nentries, sizeof(Datum), cmpEntries);
|
||||
} |
||||
|
||||
return entries; |
||||
} |
||||
|
||||
|
||||
Datum* |
||||
extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) { |
||||
Datum *entries = extractEntriesS(ginstate, value, nentries); |
||||
|
||||
if ( *nentries>1 && needUnique ) { |
||||
Datum *ptr, *res; |
||||
|
||||
ptr = res = entries; |
||||
|
||||
while( ptr - entries < *nentries ) { |
||||
if ( compareEntries(ginstate, *ptr, *res ) != 0 )
|
||||
*(++res) = *ptr++; |
||||
else |
||||
ptr++; |
||||
} |
||||
|
||||
*nentries = res + 1 - entries; |
||||
} |
||||
|
||||
return entries; |
||||
} |
||||
|
||||
/*
|
||||
* It's analog of PageGetTempPage(), but copies whole page |
||||
*/ |
||||
Page |
||||
GinPageGetCopyPage( Page page ) { |
||||
Size pageSize = PageGetPageSize( page ); |
||||
Page tmppage; |
||||
|
||||
tmppage=(Page)palloc( pageSize ); |
||||
memcpy( tmppage, page, pageSize ); |
||||
|
||||
return tmppage; |
||||
} |
@ -0,0 +1,647 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginvacuum.c |
||||
* delete & vacuum routines for the postgres GIN |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/freespace.h" |
||||
#include "utils/memutils.h" |
||||
#include "storage/freespace.h" |
||||
#include "storage/smgr.h" |
||||
#include "commands/vacuum.h" |
||||
|
||||
typedef struct { |
||||
Relation index; |
||||
IndexBulkDeleteResult *result; |
||||
IndexBulkDeleteCallback callback; |
||||
void *callback_state; |
||||
GinState ginstate; |
||||
} GinVacuumState; |
||||
|
||||
|
||||
/*
|
||||
* Cleans array of ItemPointer (removes dead pointers) |
||||
* Results are always stored in *cleaned, which will be allocated |
||||
* if its needed. In case of *cleaned!=NULL caller is resposible to
|
||||
* enough space. *cleaned and items may point to the same |
||||
* memory addres. |
||||
*/ |
||||
|
||||
static uint32 |
||||
ginVacuumPostingList( GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned ) { |
||||
uint32 i,j=0; |
||||
|
||||
/*
|
||||
* just scan over ItemPointer array |
||||
*/ |
||||
|
||||
for(i=0;i<nitem;i++) { |
||||
if ( gvs->callback(items+i, gvs->callback_state) ) {
|
||||
gvs->result->tuples_removed += 1; |
||||
if ( !*cleaned ) { |
||||
*cleaned = (ItemPointerData*)palloc(sizeof(ItemPointerData)*nitem); |
||||
if ( i!=0 )
|
||||
memcpy( *cleaned, items, sizeof(ItemPointerData)*i); |
||||
} |
||||
} else { |
||||
gvs->result->num_index_tuples += 1; |
||||
if (i!=j) |
||||
(*cleaned)[j] = items[i]; |
||||
j++; |
||||
} |
||||
} |
||||
|
||||
return j; |
||||
} |
||||
|
||||
/*
|
||||
* fills WAL record for vacuum leaf page |
||||
*/ |
||||
static void |
||||
xlogVacuumPage(Relation index, Buffer buffer) { |
||||
Page page = BufferGetPage( buffer ); |
||||
XLogRecPtr recptr; |
||||
XLogRecData rdata[3]; |
||||
ginxlogVacuumPage data; |
||||
char *backup; |
||||
char itups[BLCKSZ]; |
||||
uint32 len=0; |
||||
|
||||
Assert( GinPageIsLeaf( page ) ); |
||||
|
||||
if (index->rd_istemp) |
||||
return;
|
||||
|
||||
data.node = index->rd_node; |
||||
data.blkno = BufferGetBlockNumber(buffer); |
||||
|
||||
if ( GinPageIsData( page ) ) { |
||||
backup = GinDataPageGetData( page ); |
||||
data.nitem = GinPageGetOpaque( page )->maxoff; |
||||
if ( data.nitem ) |
||||
len = MAXALIGN( sizeof(ItemPointerData)*data.nitem ); |
||||
} else { |
||||
char *ptr; |
||||
OffsetNumber i; |
||||
|
||||
ptr = backup = itups; |
||||
for(i=FirstOffsetNumber;i<=PageGetMaxOffsetNumber(page);i++) { |
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
||||
memcpy( ptr, itup, IndexTupleSize( itup ) ); |
||||
ptr += MAXALIGN( IndexTupleSize( itup ) ); |
||||
} |
||||
|
||||
data.nitem = PageGetMaxOffsetNumber(page); |
||||
len = ptr-backup; |
||||
} |
||||
|
||||
rdata[0].buffer = buffer; |
||||
rdata[0].buffer_std = ( GinPageIsData( page ) ) ? FALSE : TRUE; |
||||
rdata[0].len = 0; |
||||
rdata[0].data = NULL; |
||||
rdata[0].next = rdata + 1; |
||||
|
||||
rdata[1].buffer = InvalidBuffer; |
||||
rdata[1].len = sizeof(ginxlogVacuumPage); |
||||
rdata[1].data = (char*)&data; |
||||
|
||||
if ( len == 0 ) { |
||||
rdata[1].next = NULL; |
||||
} else { |
||||
rdata[1].next = rdata + 2; |
||||
|
||||
rdata[2].buffer = InvalidBuffer; |
||||
rdata[2].len = len; |
||||
rdata[2].data = backup; |
||||
rdata[2].next = NULL; |
||||
} |
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
} |
||||
|
||||
static bool |
||||
ginVacuumPostingTreeLeaves( GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer ) {
|
||||
Buffer buffer = ReadBuffer( gvs->index, blkno ); |
||||
Page page = BufferGetPage( buffer ); |
||||
bool hasVoidPage = FALSE;
|
||||
|
||||
/*
|
||||
* We should be sure that we don't concurrent with inserts, insert process |
||||
* never release root page until end (but it can unlock it and lock again). |
||||
* If we lock root with with LockBufferForCleanup, new scan process can't begin, |
||||
* but previous may run.
|
||||
* ginmarkpos/start* keeps buffer pinned, so we will wait for it. |
||||
* We lock only one posting tree in whole index, so, it's concurrent enough.. |
||||
* Side effect: after this is full complete, tree is unused by any other process
|
||||
*/ |
||||
|
||||
LockBufferForCleanup( buffer ); |
||||
|
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
if ( GinPageIsLeaf(page) ) { |
||||
OffsetNumber newMaxOff, oldMaxOff = GinPageGetOpaque(page)->maxoff; |
||||
ItemPointerData *cleaned = NULL; |
||||
|
||||
newMaxOff = ginVacuumPostingList( gvs,
|
||||
(ItemPointer)GinDataPageGetData(page), oldMaxOff, &cleaned ); |
||||
|
||||
/* saves changes about deleted tuple ... */ |
||||
if ( oldMaxOff != newMaxOff ) { |
||||
|
||||
START_CRIT_SECTION(); |
||||
|
||||
if ( newMaxOff > 0 )
|
||||
memcpy( GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff ); |
||||
pfree( cleaned ); |
||||
GinPageGetOpaque(page)->maxoff = newMaxOff; |
||||
|
||||
xlogVacuumPage(gvs->index, buffer); |
||||
|
||||
MarkBufferDirty( buffer ); |
||||
END_CRIT_SECTION(); |
||||
|
||||
/* if root is a leaf page, we don't desire futher processing */
|
||||
if ( !isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) |
||||
hasVoidPage = TRUE; |
||||
} |
||||
} else { |
||||
OffsetNumber i; |
||||
bool isChildHasVoid = FALSE; |
||||
|
||||
for( i=FirstOffsetNumber ; i <= GinPageGetOpaque(page)->maxoff ; i++ ) { |
||||
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page, i); |
||||
if ( ginVacuumPostingTreeLeaves( gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL ) ) |
||||
isChildHasVoid = TRUE; |
||||
} |
||||
|
||||
if ( isChildHasVoid ) |
||||
hasVoidPage = TRUE; |
||||
} |
||||
|
||||
/* if we have root and theres void pages in tree, then we don't release lock
|
||||
to go further processing and guarantee that tree is unused */ |
||||
if ( !(isRoot && hasVoidPage) ) { |
||||
UnlockReleaseBuffer( buffer ); |
||||
} else { |
||||
Assert( rootBuffer ); |
||||
*rootBuffer = buffer; |
||||
} |
||||
|
||||
return hasVoidPage; |
||||
} |
||||
|
||||
static void |
||||
ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
|
||||
BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot ) { |
||||
Buffer dBuffer = ReadBuffer( gvs->index, deleteBlkno ); |
||||
Buffer lBuffer = (leftBlkno==InvalidBlockNumber) ? InvalidBuffer : ReadBuffer( gvs->index, leftBlkno ); |
||||
Buffer pBuffer = ReadBuffer( gvs->index, parentBlkno ); |
||||
Page page, parentPage; |
||||
|
||||
LockBuffer( dBuffer, GIN_EXCLUSIVE ); |
||||
if ( !isParentRoot ) /* parent is already locked by LockBufferForCleanup() */ |
||||
LockBuffer( pBuffer, GIN_EXCLUSIVE ); |
||||
|
||||
START_CRIT_SECTION(); |
||||
|
||||
if ( leftBlkno!= InvalidBlockNumber ) { |
||||
BlockNumber rightlink; |
||||
|
||||
LockBuffer( lBuffer, GIN_EXCLUSIVE ); |
||||
|
||||
page = BufferGetPage( dBuffer ); |
||||
rightlink = GinPageGetOpaque(page)->rightlink; |
||||
|
||||
page = BufferGetPage( lBuffer ); |
||||
GinPageGetOpaque(page)->rightlink = rightlink; |
||||
} |
||||
|
||||
parentPage = BufferGetPage( pBuffer ); |
||||
PageDeletePostingItem(parentPage, myoff); |
||||
|
||||
page = BufferGetPage( dBuffer ); |
||||
GinPageGetOpaque(page)->flags = GIN_DELETED; |
||||
|
||||
if (!gvs->index->rd_istemp) { |
||||
XLogRecPtr recptr; |
||||
XLogRecData rdata[4]; |
||||
ginxlogDeletePage data; |
||||
int n; |
||||
|
||||
data.node = gvs->index->rd_node; |
||||
data.blkno = deleteBlkno; |
||||
data.parentBlkno = parentBlkno; |
||||
data.parentOffset = myoff; |
||||
data.leftBlkno = leftBlkno;
|
||||
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
rdata[0].buffer = dBuffer; |
||||
rdata[0].buffer_std = FALSE; |
||||
rdata[0].data = NULL; |
||||
rdata[0].len = 0; |
||||
rdata[0].next = rdata + 1; |
||||
|
||||
rdata[1].buffer = pBuffer; |
||||
rdata[1].buffer_std = FALSE; |
||||
rdata[1].data = NULL; |
||||
rdata[1].len = 0; |
||||
rdata[1].next = rdata + 2; |
||||
|
||||
if ( leftBlkno!= InvalidBlockNumber ) {
|
||||
rdata[2].buffer = lBuffer; |
||||
rdata[2].buffer_std = FALSE; |
||||
rdata[2].data = NULL; |
||||
rdata[2].len = 0; |
||||
rdata[2].next = rdata + 3; |
||||
n = 3; |
||||
} else |
||||
n = 2; |
||||
|
||||
rdata[n].buffer = InvalidBuffer; |
||||
rdata[n].buffer_std = FALSE; |
||||
rdata[n].len = sizeof(ginxlogDeletePage); |
||||
rdata[n].data = (char*)&data; |
||||
rdata[n].next = NULL; |
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
PageSetLSN(parentPage, recptr); |
||||
PageSetTLI(parentPage, ThisTimeLineID); |
||||
if ( leftBlkno!= InvalidBlockNumber ) { |
||||
page = BufferGetPage( lBuffer ); |
||||
PageSetLSN(page, recptr); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
} |
||||
} |
||||
|
||||
MarkBufferDirty( pBuffer ); |
||||
if ( !isParentRoot ) |
||||
LockBuffer( pBuffer, GIN_UNLOCK ); |
||||
ReleaseBuffer( pBuffer ); |
||||
|
||||
if ( leftBlkno!= InvalidBlockNumber ) { |
||||
MarkBufferDirty( lBuffer ); |
||||
UnlockReleaseBuffer( lBuffer ); |
||||
} |
||||
|
||||
MarkBufferDirty( dBuffer ); |
||||
UnlockReleaseBuffer( dBuffer ); |
||||
|
||||
END_CRIT_SECTION(); |
||||
|
||||
gvs->result->pages_deleted++; |
||||
} |
||||
|
||||
typedef struct DataPageDeleteStack { |
||||
struct DataPageDeleteStack *child;
|
||||
struct DataPageDeleteStack *parent; |
||||
|
||||
BlockNumber blkno; |
||||
bool isRoot; |
||||
} DataPageDeleteStack; |
||||
|
||||
/*
|
||||
* scans posting tree and deletes empty pages |
||||
*/ |
||||
static bool |
||||
ginScanToDelete( GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff ) { |
||||
DataPageDeleteStack *me; |
||||
Buffer buffer; |
||||
Page page; |
||||
bool meDelete = FALSE; |
||||
|
||||
if ( isRoot ) { |
||||
me = parent; |
||||
} else { |
||||
if ( ! parent->child ) { |
||||
me = (DataPageDeleteStack*)palloc0(sizeof(DataPageDeleteStack)); |
||||
me->parent=parent; |
||||
parent->child = me; |
||||
me->blkno = InvalidBlockNumber; |
||||
} else |
||||
me = parent->child; |
||||
} |
||||
|
||||
buffer = ReadBuffer( gvs->index, blkno );
|
||||
page = BufferGetPage( buffer ); |
||||
|
||||
Assert( GinPageIsData(page) ); |
||||
|
||||
if ( !GinPageIsLeaf(page) ) { |
||||
OffsetNumber i; |
||||
|
||||
for(i=FirstOffsetNumber;i<=GinPageGetOpaque(page)->maxoff;i++) { |
||||
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page, i); |
||||
|
||||
if ( ginScanToDelete( gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i ) ) |
||||
i--; |
||||
} |
||||
} |
||||
|
||||
if ( GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) { |
||||
if ( !( me->blkno == InvalidBlockNumber && GinPageRightMost(page) ) ) { |
||||
/* we never delete right most branch */ |
||||
Assert( !isRoot ); |
||||
if ( GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) { |
||||
ginDeletePage( gvs, blkno, me->blkno, me->parent->blkno, myoff, me->parent->isRoot ); |
||||
meDelete = TRUE; |
||||
} |
||||
} |
||||
} |
||||
|
||||
ReleaseBuffer( buffer ); |
||||
|
||||
if ( !meDelete ) |
||||
me->blkno = blkno; |
||||
|
||||
return meDelete; |
||||
} |
||||
|
||||
static void |
||||
ginVacuumPostingTree( GinVacuumState *gvs, BlockNumber rootBlkno ) { |
||||
Buffer rootBuffer = InvalidBuffer; |
||||
DataPageDeleteStack root, *ptr, *tmp; |
||||
|
||||
if ( ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer)==FALSE ) { |
||||
Assert( rootBuffer == InvalidBuffer ); |
||||
return; |
||||
} |
||||
|
||||
memset(&root,0,sizeof(DataPageDeleteStack)); |
||||
root.blkno = rootBlkno; |
||||
root.isRoot = TRUE; |
||||
|
||||
vacuum_delay_point(); |
||||
|
||||
ginScanToDelete( gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber );
|
||||
|
||||
ptr = root.child; |
||||
while( ptr ) { |
||||
tmp = ptr->child; |
||||
pfree( ptr ); |
||||
ptr = tmp; |
||||
} |
||||
|
||||
UnlockReleaseBuffer( rootBuffer ); |
||||
} |
||||
|
||||
/*
|
||||
* returns modified page or NULL if page isn't modified. |
||||
* Function works with original page until first change is occured, |
||||
* then page is copied into temprorary one. |
||||
*/ |
||||
static Page |
||||
ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) { |
||||
Page origpage = BufferGetPage( buffer ), tmppage; |
||||
OffsetNumber i, maxoff = PageGetMaxOffsetNumber( origpage ); |
||||
|
||||
tmppage = origpage; |
||||
|
||||
*nroot=0; |
||||
|
||||
for(i=FirstOffsetNumber; i<= maxoff; i++) { |
||||
IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); |
||||
|
||||
if ( GinIsPostingTree(itup) ) { |
||||
/* store posting tree's roots for further processing,
|
||||
we can't vacuum it just now due to risk of deadlocks with scans/inserts */ |
||||
roots[ *nroot ] = GinItemPointerGetBlockNumber(&itup->t_tid); |
||||
(*nroot)++; |
||||
} else if ( GinGetNPosting(itup) > 0 ) { |
||||
/* if we already create temrorary page, we will make changes in place */ |
||||
ItemPointerData *cleaned = (tmppage==origpage) ? NULL : GinGetPosting(itup ); |
||||
uint32 newN = ginVacuumPostingList( gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned ); |
||||
|
||||
if ( GinGetNPosting(itup) != newN ) { |
||||
bool isnull; |
||||
Datum value; |
||||
|
||||
/*
|
||||
* Some ItemPointers was deleted, so we should remake our tuple
|
||||
*/ |
||||
|
||||
if ( tmppage==origpage ) { |
||||
/*
|
||||
* On first difference we create temprorary page in memory |
||||
* and copies content in to it. |
||||
*/ |
||||
tmppage=GinPageGetCopyPage ( origpage ); |
||||
|
||||
if ( newN > 0 ) {
|
||||
Size pos = ((char*)GinGetPosting(itup)) - ((char*)origpage); |
||||
memcpy( tmppage+pos, cleaned, sizeof(ItemPointerData)*newN ); |
||||
} |
||||
|
||||
pfree( cleaned ); |
||||
|
||||
/* set itup pointer to new page */ |
||||
itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); |
||||
} |
||||
|
||||
value = index_getattr(itup, FirstOffsetNumber, gvs->ginstate.tupdesc, &isnull); |
||||
itup = GinFormTuple(&gvs->ginstate, value, GinGetPosting(itup), newN); |
||||
PageIndexTupleDelete(tmppage, i); |
||||
|
||||
if ( PageAddItem( tmppage, (Item)itup, IndexTupleSize(itup), i, LP_USED ) != i ) |
||||
elog(ERROR, "failed to add item to index page in \"%s\"", |
||||
RelationGetRelationName(gvs->index)); |
||||
|
||||
pfree( itup ); |
||||
} |
||||
} |
||||
} |
||||
|
||||
return ( tmppage==origpage ) ? NULL : tmppage; |
||||
} |
||||
|
||||
Datum |
||||
ginbulkdelete(PG_FUNCTION_ARGS) { |
||||
Relation index = (Relation) PG_GETARG_POINTER(0); |
||||
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); |
||||
void *callback_state = (void *) PG_GETARG_POINTER(2); |
||||
BlockNumber blkno = GIN_ROOT_BLKNO; |
||||
GinVacuumState gvs; |
||||
Buffer buffer; |
||||
BlockNumber rootOfPostingTree[ BLCKSZ/ (sizeof(IndexTupleData)+sizeof(ItemId)) ]; |
||||
uint32 nRoot; |
||||
|
||||
gvs.index = index; |
||||
gvs.result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); |
||||
gvs.callback = callback; |
||||
gvs.callback_state = callback_state; |
||||
initGinState(&gvs.ginstate, index); |
||||
|
||||
buffer = ReadBuffer( index, blkno ); |
||||
|
||||
/* find leaf page */ |
||||
for(;;) { |
||||
Page page = BufferGetPage( buffer ); |
||||
IndexTuple itup; |
||||
|
||||
LockBuffer(buffer,GIN_SHARE); |
||||
|
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
if ( GinPageIsLeaf(page) ) { |
||||
LockBuffer(buffer,GIN_UNLOCK); |
||||
LockBuffer(buffer,GIN_EXCLUSIVE); |
||||
|
||||
if ( blkno==GIN_ROOT_BLKNO && !GinPageIsLeaf(page) ) { |
||||
LockBuffer(buffer,GIN_UNLOCK); |
||||
continue; /* check it one more */ |
||||
} |
||||
break;
|
||||
} |
||||
|
||||
Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber ); |
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); |
||||
blkno = GinItemPointerGetBlockNumber(&(itup)->t_tid); |
||||
Assert( blkno!= InvalidBlockNumber ); |
||||
|
||||
LockBuffer(buffer,GIN_UNLOCK); |
||||
buffer = ReleaseAndReadBuffer( buffer, index, blkno ); |
||||
} |
||||
|
||||
/* right now we found leftmost page in entry's BTree */ |
||||
|
||||
for(;;) { |
||||
Page page = BufferGetPage( buffer ); |
||||
Page resPage; |
||||
uint32 i; |
||||
|
||||
Assert( !GinPageIsData(page) ); |
||||
|
||||
resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); |
||||
|
||||
blkno = GinPageGetOpaque( page )->rightlink; |
||||
|
||||
if ( resPage ) { |
||||
START_CRIT_SECTION(); |
||||
PageRestoreTempPage( resPage, page ); |
||||
xlogVacuumPage(gvs.index, buffer); |
||||
MarkBufferDirty( buffer ); |
||||
UnlockReleaseBuffer(buffer); |
||||
END_CRIT_SECTION(); |
||||
} else { |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
vacuum_delay_point(); |
||||
|
||||
for(i=0; i<nRoot; i++) {
|
||||
ginVacuumPostingTree( &gvs, rootOfPostingTree[i] ); |
||||
vacuum_delay_point(); |
||||
} |
||||
|
||||
if ( blkno==InvalidBlockNumber ) /*rightmost page*/ |
||||
break; |
||||
|
||||
buffer = ReadBuffer( index, blkno ); |
||||
LockBuffer(buffer,GIN_EXCLUSIVE); |
||||
} |
||||
|
||||
PG_RETURN_POINTER(gvs.result); |
||||
} |
||||
|
||||
Datum
|
||||
ginvacuumcleanup(PG_FUNCTION_ARGS) { |
||||
Relation index = (Relation) PG_GETARG_POINTER(0); |
||||
IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1); |
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); |
||||
bool needLock = !RELATION_IS_LOCAL(index); |
||||
BlockNumber npages, |
||||
blkno; |
||||
BlockNumber nFreePages, |
||||
*freePages, |
||||
maxFreePages; |
||||
BlockNumber lastBlock = GIN_ROOT_BLKNO, |
||||
lastFilledBlock = GIN_ROOT_BLKNO; |
||||
|
||||
|
||||
if (info->vacuum_full) { |
||||
LockRelation(index, AccessExclusiveLock); |
||||
needLock = false; |
||||
} |
||||
|
||||
if (needLock) |
||||
LockRelationForExtension(index, ExclusiveLock); |
||||
npages = RelationGetNumberOfBlocks(index); |
||||
if (needLock) |
||||
UnlockRelationForExtension(index, ExclusiveLock); |
||||
|
||||
maxFreePages = npages; |
||||
if (maxFreePages > MaxFSMPages) |
||||
maxFreePages = MaxFSMPages; |
||||
|
||||
nFreePages = 0; |
||||
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages); |
||||
|
||||
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) { |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
vacuum_delay_point(); |
||||
|
||||
buffer = ReadBuffer(index, blkno); |
||||
LockBuffer(buffer, GIN_SHARE); |
||||
page = (Page) BufferGetPage(buffer); |
||||
|
||||
if ( GinPageIsDeleted(page) ) { |
||||
if (nFreePages < maxFreePages) |
||||
freePages[nFreePages++] = blkno; |
||||
} else |
||||
lastFilledBlock = blkno; |
||||
|
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
lastBlock = npages - 1; |
||||
|
||||
if (info->vacuum_full && nFreePages > 0) { |
||||
/* try to truncate index */ |
||||
int i; |
||||
for (i = 0; i < nFreePages; i++)
|
||||
if (freePages[i] >= lastFilledBlock) { |
||||
nFreePages = i; |
||||
break; |
||||
} |
||||
|
||||
if (lastBlock > lastFilledBlock) |
||||
RelationTruncate(index, lastFilledBlock + 1); |
||||
|
||||
stats->pages_removed = lastBlock - lastFilledBlock; |
||||
} |
||||
|
||||
RecordIndexFreeSpace(&index->rd_node, nFreePages, freePages); |
||||
stats->pages_free = nFreePages; |
||||
|
||||
if (needLock) |
||||
LockRelationForExtension(index, ExclusiveLock); |
||||
stats->num_pages = RelationGetNumberOfBlocks(index); |
||||
if (needLock) |
||||
UnlockRelationForExtension(index, ExclusiveLock); |
||||
|
||||
if (info->vacuum_full) |
||||
UnlockRelation(index, AccessExclusiveLock); |
||||
|
||||
PG_RETURN_POINTER(stats); |
||||
} |
||||
|
@ -0,0 +1,544 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* ginxlog.c |
||||
* WAL replay logic for inverted index. |
||||
* |
||||
* |
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "access/genam.h" |
||||
#include "access/gin.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/index.h" |
||||
#include "commands/vacuum.h" |
||||
#include "miscadmin.h" |
||||
#include "utils/memutils.h" |
||||
|
||||
static MemoryContext opCtx; /* working memory for operations */ |
||||
static MemoryContext topCtx; |
||||
|
||||
typedef struct ginIncompleteSplit { |
||||
RelFileNode node; |
||||
BlockNumber leftBlkno; |
||||
BlockNumber rightBlkno; |
||||
BlockNumber rootBlkno; |
||||
} ginIncompleteSplit; |
||||
|
||||
static List *incomplete_splits; |
||||
|
||||
static void |
||||
pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBlkno, BlockNumber rootBlkno) { |
||||
ginIncompleteSplit *split;
|
||||
|
||||
MemoryContextSwitchTo( topCtx );
|
||||
|
||||
split = palloc(sizeof(ginIncompleteSplit)); |
||||
|
||||
split->node = node; |
||||
split->leftBlkno = leftBlkno; |
||||
split->rightBlkno = rightBlkno; |
||||
split->rootBlkno = rootBlkno; |
||||
|
||||
incomplete_splits = lappend(incomplete_splits, split); |
||||
|
||||
MemoryContextSwitchTo( opCtx );
|
||||
} |
||||
|
||||
static void |
||||
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno) { |
||||
ListCell *l; |
||||
|
||||
foreach(l, incomplete_splits) { |
||||
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l); |
||||
|
||||
if ( RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno ) { |
||||
incomplete_splits = list_delete_ptr(incomplete_splits, split); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void |
||||
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { |
||||
RelFileNode *node = (RelFileNode *) XLogRecGetData(record); |
||||
Relation reln; |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
reln = XLogOpenRelation(*node); |
||||
buffer = XLogReadBuffer(reln, GIN_ROOT_BLKNO, true); |
||||
Assert(BufferIsValid(buffer)); |
||||
page = (Page) BufferGetPage(buffer); |
||||
|
||||
GinInitBuffer(buffer, GIN_LEAF); |
||||
|
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
|
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
static void |
||||
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) { |
||||
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree*)XLogRecGetData(record); |
||||
ItemPointerData *items = (ItemPointerData*)(XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree)); |
||||
Relation reln; |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
reln = XLogOpenRelation(data->node); |
||||
buffer = XLogReadBuffer(reln, data->blkno, true); |
||||
Assert(BufferIsValid(buffer)); |
||||
page = (Page) BufferGetPage(buffer); |
||||
|
||||
GinInitBuffer(buffer, GIN_DATA|GIN_LEAF); |
||||
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * data->nitem ); |
||||
GinPageGetOpaque(page)->maxoff = data->nitem; |
||||
|
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
|
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
static void |
||||
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) { |
||||
ginxlogInsert *data = (ginxlogInsert*)XLogRecGetData(record); |
||||
Relation reln; |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
/* nothing else to do if page was backed up (and no info to do it with) */ |
||||
if (record->xl_info & XLR_BKP_BLOCK_1) |
||||
return; |
||||
|
||||
reln = XLogOpenRelation(data->node); |
||||
buffer = XLogReadBuffer(reln, data->blkno, false); |
||||
Assert(BufferIsValid(buffer)); |
||||
page = (Page) BufferGetPage(buffer); |
||||
|
||||
if ( data->isData ) { |
||||
Assert( data->isDelete == FALSE ); |
||||
Assert( GinPageIsData( page ) ); |
||||
|
||||
if ( data->isLeaf ) { |
||||
OffsetNumber i; |
||||
ItemPointerData *items = (ItemPointerData*)( XLogRecGetData(record) + sizeof(ginxlogInsert) ); |
||||
|
||||
Assert( GinPageIsLeaf( page ) ); |
||||
Assert( data->updateBlkno == InvalidBlockNumber ); |
||||
|
||||
for(i=0;i<data->nitem;i++) |
||||
GinDataPageAddItem( page, items+i, data->offset + i );
|
||||
} else { |
||||
PostingItem *pitem; |
||||
|
||||
Assert( !GinPageIsLeaf( page ) ); |
||||
|
||||
if ( data->updateBlkno != InvalidBlockNumber ) { |
||||
/* update link to right page after split */
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, data->offset); |
||||
PostingItemSetBlockNumber( pitem, data->updateBlkno ); |
||||
} |
||||
|
||||
pitem = (PostingItem*)( XLogRecGetData(record) + sizeof(ginxlogInsert) ); |
||||
|
||||
GinDataPageAddItem( page, pitem, data->offset ); |
||||
|
||||
if ( data->updateBlkno != InvalidBlockNumber )
|
||||
forgetIncompleteSplit(data->node, PostingItemGetBlockNumber( pitem ), data->updateBlkno); |
||||
} |
||||
} else { |
||||
IndexTuple itup; |
||||
|
||||
Assert( !GinPageIsData( page ) ); |
||||
|
||||
if ( data->updateBlkno != InvalidBlockNumber ) { |
||||
/* update link to right page after split */
|
||||
Assert( !GinPageIsLeaf( page ) ); |
||||
Assert( data->offset>=FirstOffsetNumber && data->offset<=PageGetMaxOffsetNumber(page) ); |
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, data->offset)); |
||||
ItemPointerSet(&itup->t_tid, data->updateBlkno, InvalidOffsetNumber); |
||||
} |
||||
|
||||
if ( data->isDelete ) { |
||||
Assert( GinPageIsLeaf( page ) ); |
||||
Assert( data->offset>=FirstOffsetNumber && data->offset<=PageGetMaxOffsetNumber(page) ); |
||||
PageIndexTupleDelete(page, data->offset); |
||||
} |
||||
|
||||
itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogInsert) ); |
||||
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), data->offset, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode ); |
||||
|
||||
if ( !data->isLeaf && data->updateBlkno != InvalidBlockNumber ) |
||||
forgetIncompleteSplit(data->node, GinItemPointerGetBlockNumber( &itup->t_tid ), data->updateBlkno); |
||||
} |
||||
|
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
|
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
static void |
||||
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) { |
||||
ginxlogSplit *data = (ginxlogSplit*)XLogRecGetData(record); |
||||
Relation reln; |
||||
Buffer lbuffer, rbuffer; |
||||
Page lpage, rpage; |
||||
uint32 flags = 0; |
||||
|
||||
reln = XLogOpenRelation(data->node); |
||||
|
||||
if ( data->isLeaf ) |
||||
flags |= GIN_LEAF; |
||||
if ( data->isData ) |
||||
flags |= GIN_DATA; |
||||
|
||||
lbuffer = XLogReadBuffer(reln, data->lblkno, data->isRootSplit); |
||||
Assert(BufferIsValid(lbuffer)); |
||||
lpage = (Page) BufferGetPage(lbuffer); |
||||
GinInitBuffer(lbuffer, flags); |
||||
|
||||
rbuffer = XLogReadBuffer(reln, data->rblkno, true); |
||||
Assert(BufferIsValid(rbuffer)); |
||||
rpage = (Page) BufferGetPage(rbuffer); |
||||
GinInitBuffer(rbuffer, flags); |
||||
|
||||
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber( rbuffer ); |
||||
GinPageGetOpaque(rpage)->rightlink = data->rrlink; |
||||
|
||||
if ( data->isData ) { |
||||
char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit); |
||||
Size sizeofitem = GinSizeOfItem(lpage); |
||||
OffsetNumber i; |
||||
ItemPointer bound; |
||||
|
||||
for(i=0;i<data->separator;i++) { |
||||
GinDataPageAddItem( lpage, ptr, InvalidOffsetNumber ); |
||||
ptr += sizeofitem; |
||||
} |
||||
|
||||
for(i=data->separator;i<data->nitem;i++) { |
||||
GinDataPageAddItem( rpage, ptr, InvalidOffsetNumber ); |
||||
ptr += sizeofitem; |
||||
} |
||||
|
||||
/* set up right key */ |
||||
bound = GinDataPageGetRightBound(lpage); |
||||
if ( data->isLeaf )
|
||||
*bound = *(ItemPointerData*)GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff); |
||||
else |
||||
*bound = ((PostingItem*)GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key; |
||||
|
||||
bound = GinDataPageGetRightBound(rpage); |
||||
*bound = data->rightbound; |
||||
} else { |
||||
IndexTuple itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogSplit) ); |
||||
OffsetNumber i; |
||||
|
||||
for(i=0;i<data->separator;i++) { |
||||
if ( PageAddItem( lpage, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode ); |
||||
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) ); |
||||
} |
||||
|
||||
for(i=data->separator;i<data->nitem;i++) { |
||||
if ( PageAddItem( rpage, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode ); |
||||
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) ); |
||||
} |
||||
} |
||||
|
||||
PageSetLSN(rpage, lsn); |
||||
PageSetTLI(lpage, ThisTimeLineID); |
||||
MarkBufferDirty(rbuffer); |
||||
|
||||
PageSetLSN(lpage, lsn); |
||||
PageSetTLI(lpage, ThisTimeLineID); |
||||
MarkBufferDirty(lbuffer); |
||||
|
||||
if ( !data->isLeaf && data->updateBlkno != InvalidBlockNumber )
|
||||
forgetIncompleteSplit(data->node, data->leftChildBlkno, data->updateBlkno); |
||||
|
||||
if ( data->isRootSplit ) { |
||||
Buffer rootBuf = XLogReadBuffer(reln, data->rootBlkno, false); |
||||
Page rootPage = BufferGetPage( rootBuf ); |
||||
|
||||
GinInitBuffer( rootBuf, flags & ~GIN_LEAF ); |
||||
|
||||
if ( data->isData ) { |
||||
Assert( data->rootBlkno != GIN_ROOT_BLKNO ); |
||||
dataFillRoot(NULL, rootBuf, lbuffer, rbuffer); |
||||
} else { |
||||
Assert( data->rootBlkno == GIN_ROOT_BLKNO ); |
||||
entryFillRoot(NULL, rootBuf, lbuffer, rbuffer); |
||||
} |
||||
|
||||
PageSetLSN(rootPage, lsn); |
||||
PageSetTLI(rootPage, ThisTimeLineID); |
||||
|
||||
MarkBufferDirty(rootBuf); |
||||
UnlockReleaseBuffer(rootBuf); |
||||
} else
|
||||
pushIncompleteSplit(data->node, data->lblkno, data->rblkno, data->rootBlkno); |
||||
|
||||
UnlockReleaseBuffer(rbuffer); |
||||
UnlockReleaseBuffer(lbuffer); |
||||
} |
||||
|
||||
static void |
||||
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) { |
||||
ginxlogVacuumPage *data = (ginxlogVacuumPage*)XLogRecGetData(record); |
||||
Relation reln; |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
/* nothing else to do if page was backed up (and no info to do it with) */ |
||||
if (record->xl_info & XLR_BKP_BLOCK_1) |
||||
return; |
||||
|
||||
reln = XLogOpenRelation(data->node); |
||||
buffer = XLogReadBuffer(reln, data->blkno, false); |
||||
Assert(BufferIsValid(buffer)); |
||||
page = (Page) BufferGetPage(buffer); |
||||
|
||||
if ( GinPageIsData( page ) ) { |
||||
memcpy( GinDataPageGetData(page), XLogRecGetData(record) + sizeof(ginxlogVacuumPage), |
||||
GinSizeOfItem(page) * data->nitem ); |
||||
GinPageGetOpaque(page)->maxoff = data->nitem; |
||||
} else { |
||||
OffsetNumber i, *tod; |
||||
IndexTuple itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogVacuumPage) ); |
||||
|
||||
tod = (OffsetNumber*)palloc( sizeof(OffsetNumber) * PageGetMaxOffsetNumber(page) ); |
||||
for(i=FirstOffsetNumber;i<=PageGetMaxOffsetNumber(page);i++) |
||||
tod[i-1] = i; |
||||
|
||||
PageIndexMultiDelete(page, tod, PageGetMaxOffsetNumber(page));
|
||||
|
||||
for(i=0;i<data->nitem;i++) { |
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber ) |
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode ); |
||||
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) ); |
||||
} |
||||
} |
||||
|
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
|
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
static void |
||||
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) { |
||||
ginxlogDeletePage *data = (ginxlogDeletePage*)XLogRecGetData(record); |
||||
Relation reln; |
||||
Buffer buffer; |
||||
Page page; |
||||
|
||||
reln = XLogOpenRelation(data->node); |
||||
|
||||
if ( !( record->xl_info & XLR_BKP_BLOCK_1) ) { |
||||
buffer = XLogReadBuffer(reln, data->blkno, false); |
||||
page = BufferGetPage( buffer ); |
||||
Assert(GinPageIsData(page)); |
||||
GinPageGetOpaque(page)->flags = GIN_DELETED; |
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
if ( !( record->xl_info & XLR_BKP_BLOCK_2) ) {
|
||||
buffer = XLogReadBuffer(reln, data->parentBlkno, false); |
||||
page = BufferGetPage( buffer ); |
||||
Assert(GinPageIsData(page)); |
||||
Assert(!GinPageIsLeaf(page)); |
||||
PageDeletePostingItem(page, data->parentOffset); |
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
|
||||
if ( !( record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber ) {
|
||||
buffer = XLogReadBuffer(reln, data->leftBlkno, false); |
||||
page = BufferGetPage( buffer ); |
||||
Assert(GinPageIsData(page)); |
||||
GinPageGetOpaque(page)->rightlink = data->rightLink; |
||||
PageSetLSN(page, lsn); |
||||
PageSetTLI(page, ThisTimeLineID); |
||||
MarkBufferDirty(buffer); |
||||
UnlockReleaseBuffer(buffer); |
||||
} |
||||
} |
||||
|
||||
void
|
||||
gin_redo(XLogRecPtr lsn, XLogRecord *record) { |
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK; |
||||
|
||||
topCtx = MemoryContextSwitchTo(opCtx); |
||||
switch (info) { |
||||
case XLOG_GIN_CREATE_INDEX: |
||||
ginRedoCreateIndex(lsn, record); |
||||
break; |
||||
case XLOG_GIN_CREATE_PTREE: |
||||
ginRedoCreatePTree(lsn, record); |
||||
break; |
||||
case XLOG_GIN_INSERT: |
||||
ginRedoInsert(lsn, record); |
||||
break; |
||||
case XLOG_GIN_SPLIT: |
||||
ginRedoSplit(lsn, record); |
||||
break; |
||||
case XLOG_GIN_VACUUM_PAGE: |
||||
ginRedoVacuumPage(lsn, record); |
||||
break; |
||||
case XLOG_GIN_DELETE_PAGE: |
||||
ginRedoDeletePage(lsn, record); |
||||
break; |
||||
default: |
||||
elog(PANIC, "gin_redo: unknown op code %u", info); |
||||
} |
||||
MemoryContextSwitchTo(topCtx); |
||||
MemoryContextReset(opCtx); |
||||
} |
||||
|
||||
static void |
||||
desc_node( StringInfo buf, RelFileNode node, BlockNumber blkno ) { |
||||
appendStringInfo(buf,"node: %u/%u/%u blkno: %u", |
||||
node.spcNode, node.dbNode, node.relNode, blkno); |
||||
} |
||||
|
||||
void
|
||||
gin_desc(StringInfo buf, uint8 xl_info, char *rec) { |
||||
uint8 info = xl_info & ~XLR_INFO_MASK; |
||||
|
||||
switch (info) { |
||||
case XLOG_GIN_CREATE_INDEX: |
||||
appendStringInfo(buf,"Create index, "); |
||||
desc_node(buf, *(RelFileNode*)rec, GIN_ROOT_BLKNO ); |
||||
break; |
||||
case XLOG_GIN_CREATE_PTREE: |
||||
appendStringInfo(buf,"Create posting tree, "); |
||||
desc_node(buf, ((ginxlogCreatePostingTree*)rec)->node, ((ginxlogCreatePostingTree*)rec)->blkno ); |
||||
break; |
||||
case XLOG_GIN_INSERT: |
||||
appendStringInfo(buf,"Insert item, "); |
||||
desc_node(buf, ((ginxlogInsert*)rec)->node, ((ginxlogInsert*)rec)->blkno ); |
||||
appendStringInfo(buf," offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u", |
||||
((ginxlogInsert*)rec)->offset, |
||||
((ginxlogInsert*)rec)->nitem, |
||||
( ((ginxlogInsert*)rec)->isData ) ? 'T' : 'F', |
||||
( ((ginxlogInsert*)rec)->isLeaf ) ? 'T' : 'F', |
||||
( ((ginxlogInsert*)rec)->isDelete ) ? 'T' : 'F', |
||||
((ginxlogInsert*)rec)->updateBlkno |
||||
); |
||||
|
||||
break; |
||||
case XLOG_GIN_SPLIT: |
||||
appendStringInfo(buf,"Page split, "); |
||||
desc_node(buf, ((ginxlogSplit*)rec)->node, ((ginxlogSplit*)rec)->lblkno ); |
||||
appendStringInfo(buf," isrootsplit: %c", ( ((ginxlogSplit*)rec)->isRootSplit ) ? 'T' : 'F'); |
||||
break; |
||||
case XLOG_GIN_VACUUM_PAGE: |
||||
appendStringInfo(buf,"Vacuum page, "); |
||||
desc_node(buf, ((ginxlogVacuumPage*)rec)->node, ((ginxlogVacuumPage*)rec)->blkno ); |
||||
break; |
||||
case XLOG_GIN_DELETE_PAGE: |
||||
appendStringInfo(buf,"Delete page, "); |
||||
desc_node(buf, ((ginxlogDeletePage*)rec)->node, ((ginxlogDeletePage*)rec)->blkno ); |
||||
break; |
||||
default: |
||||
elog(PANIC, "gin_desc: unknown op code %u", info); |
||||
} |
||||
} |
||||
|
||||
void
|
||||
gin_xlog_startup(void) { |
||||
incomplete_splits = NIL; |
||||
|
||||
opCtx = AllocSetContextCreate(CurrentMemoryContext, |
||||
"GIN recovery temporary context", |
||||
ALLOCSET_DEFAULT_MINSIZE, |
||||
ALLOCSET_DEFAULT_INITSIZE, |
||||
ALLOCSET_DEFAULT_MAXSIZE); |
||||
} |
||||
|
||||
static void |
||||
ginContinueSplit( ginIncompleteSplit *split ) { |
||||
GinBtreeData btree; |
||||
Relation reln; |
||||
Buffer buffer; |
||||
GinBtreeStack stack; |
||||
|
||||
/* elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno, split->leftBlkno, split->rightBlkno); */ |
||||
reln = XLogOpenRelation(split->node); |
||||
|
||||
buffer = XLogReadBuffer(reln, split->leftBlkno, false); |
||||
|
||||
if ( split->rootBlkno == GIN_ROOT_BLKNO ) { |
||||
prepareEntryScan( &btree, reln, (Datum)0, NULL ); |
||||
btree.entry = ginPageGetLinkItup( buffer ); |
||||
} else { |
||||
Page page = BufferGetPage( buffer ); |
||||
|
||||
prepareDataScan( &btree, reln ); |
||||
|
||||
PostingItemSetBlockNumber( &(btree.pitem), split->leftBlkno ); |
||||
if ( GinPageIsLeaf(page) ) |
||||
btree.pitem.key = *(ItemPointerData*)GinDataPageGetItem(page, |
||||
GinPageGetOpaque(page)->maxoff); |
||||
else |
||||
btree.pitem.key = ((PostingItem*)GinDataPageGetItem(page, |
||||
GinPageGetOpaque(page)->maxoff))->key; |
||||
} |
||||
|
||||
btree.rightblkno = split->rightBlkno;
|
||||
|
||||
stack.blkno = split->leftBlkno; |
||||
stack.buffer = buffer; |
||||
stack.off = InvalidOffsetNumber; |
||||
stack.parent = NULL; |
||||
|
||||
findParents( &btree, &stack, split->rootBlkno); |
||||
ginInsertValue( &btree, stack.parent ); |
||||
|
||||
UnlockReleaseBuffer( buffer ); |
||||
} |
||||
|
||||
void
|
||||
gin_xlog_cleanup(void) { |
||||
ListCell *l; |
||||
MemoryContext topCtx; |
||||
|
||||
topCtx = MemoryContextSwitchTo(opCtx); |
||||
|
||||
foreach(l, incomplete_splits) { |
||||
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l); |
||||
ginContinueSplit( split ); |
||||
MemoryContextReset( opCtx ); |
||||
} |
||||
|
||||
MemoryContextSwitchTo(topCtx); |
||||
MemoryContextDelete(opCtx); |
||||
} |
||||
|
@ -0,0 +1,436 @@ |
||||
/*--------------------------------------------------------------------------
|
||||
* gin.h |
||||
* header file for postgres inverted index access method implementation. |
||||
* |
||||
* Copyright (c) 2006, PostgreSQL Global Development Group |
||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.1 2006/05/02 11:28:55 teodor Exp $ |
||||
*-------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
|
||||
#ifndef GIN_H |
||||
#define GIN_H |
||||
|
||||
#include "access/xlog.h" |
||||
#include "access/xlogdefs.h" |
||||
#include "storage/bufpage.h" |
||||
#include "storage/off.h" |
||||
#include "utils/rel.h" |
||||
#include "access/itup.h" |
||||
#include "fmgr.h" |
||||
|
||||
|
||||
/*
|
||||
* amproc indexes for inverted indexes. |
||||
*/ |
||||
#define GIN_COMPARE_PROC 1 |
||||
#define GIN_EXTRACTVALUE_PROC 2 |
||||
#define GIN_EXTRACTQUERY_PROC 3 |
||||
#define GIN_CONSISTENT_PROC 4 |
||||
#define GINNProcs 4 |
||||
|
||||
typedef XLogRecPtr GinNSN; |
||||
|
||||
/*
|
||||
* Page opaque data in a inverted index page. |
||||
*/ |
||||
typedef struct GinPageOpaqueData { |
||||
uint16 flags; |
||||
OffsetNumber maxoff; /* number entries on GIN_DATA page:
|
||||
number of heap ItemPointer on GIN_DATA|GIN_LEAF page |
||||
and number of records on GIN_DATA & ~GIN_LEAF page |
||||
*/ |
||||
BlockNumber rightlink;
|
||||
} GinPageOpaqueData; |
||||
|
||||
typedef GinPageOpaqueData *GinPageOpaque; |
||||
|
||||
#define GIN_ROOT_BLKNO (0) |
||||
|
||||
typedef struct { |
||||
BlockIdData child_blkno; /* use it instead of BlockNumber to
|
||||
save space on page */ |
||||
ItemPointerData key; |
||||
} PostingItem; |
||||
|
||||
#define PostingItemGetBlockNumber(pointer) \ |
||||
BlockIdGetBlockNumber(&(pointer)->child_blkno) |
||||
|
||||
#define PostingItemSetBlockNumber(pointer, blockNumber) \ |
||||
BlockIdSet(&((pointer)->child_blkno), (blockNumber)) |
||||
|
||||
/*
|
||||
* Page opaque data in a inverted index page. |
||||
*/ |
||||
#define GIN_DATA (1 << 0) |
||||
#define GIN_LEAF (1 << 1) |
||||
#define GIN_DELETED (1 << 2) |
||||
|
||||
/*
|
||||
* Works on page
|
||||
*/ |
||||
#define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) ) |
||||
|
||||
#define GinPageIsLeaf(page) ( GinPageGetOpaque(page)->flags & GIN_LEAF ) |
||||
#define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF ) |
||||
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF ) |
||||
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA ) |
||||
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA ) |
||||
|
||||
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED) |
||||
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED) |
||||
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED) |
||||
|
||||
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber) |
||||
|
||||
/*
|
||||
* Define our ItemPointerGet(BlockNumber|GetOffsetNumber) |
||||
* to prevent asserts |
||||
*/ |
||||
|
||||
#define GinItemPointerGetBlockNumber(pointer) \ |
||||
BlockIdGetBlockNumber(&(pointer)->ip_blkid) |
||||
|
||||
#define GinItemPointerGetOffsetNumber(pointer) \ |
||||
((pointer)->ip_posid) |
||||
|
||||
/*
|
||||
* Support work on IndexTuuple on leaf pages |
||||
*/ |
||||
#define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid) |
||||
#define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,(n)) |
||||
#define GIN_TREE_POSTING ((OffsetNumber)0xffff) |
||||
#define GinIsPostingTree(itup) ( GinGetNPosting(itup)==GIN_TREE_POSTING ) |
||||
#define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING ), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) ) |
||||
#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) |
||||
|
||||
#define GinGetOrigSizePosting(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) |
||||
#define GinSetOrigSizePosting(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)) |
||||
#define GinGetPosting(itup) ( (ItemPointer)(( ((char*)(itup)) + SHORTALIGN(GinGetOrigSizePosting(itup)) )) ) |
||||
|
||||
#define GinMaxItemSize \ |
||||
((BLCKSZ - SizeOfPageHeaderData - \
|
||||
MAXALIGN(sizeof(GinPageOpaqueData))) / 3 - sizeof(ItemIdData)) |
||||
|
||||
|
||||
/*
|
||||
* Data (posting tree) pages |
||||
*/ |
||||
#define GinDataPageGetData(page) \ |
||||
(PageGetContents(page)+MAXALIGN(sizeof(ItemPointerData))) |
||||
#define GinDataPageGetRightBound(page) ((ItemPointer)PageGetContents(page)) |
||||
#define GinSizeOfItem(page) ( (GinPageIsLeaf(page)) ? sizeof(ItemPointerData) : sizeof(PostingItem) ) |
||||
#define GinDataPageGetItem(page,i) ( GinDataPageGetData(page) + ((i)-1) * GinSizeOfItem(page) ) |
||||
|
||||
#define GinDataPageGetFreeSpace(page) \ |
||||
( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) - \
|
||||
GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) - \
|
||||
MAXALIGN(sizeof(ItemPointerData))) |
||||
|
||||
|
||||
|
||||
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK |
||||
#define GIN_SHARE BUFFER_LOCK_SHARE |
||||
#define GIN_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE |
||||
|
||||
typedef struct GinState { |
||||
FmgrInfo compareFn; |
||||
FmgrInfo extractValueFn; |
||||
FmgrInfo extractQueryFn; |
||||
FmgrInfo consistentFn; |
||||
|
||||
TupleDesc tupdesc; |
||||
} GinState; |
||||
|
||||
/* XLog stuff */ |
||||
|
||||
#define XLOG_GIN_CREATE_INDEX 0x00 |
||||
|
||||
#define XLOG_GIN_CREATE_PTREE 0x10 |
||||
|
||||
typedef struct ginxlogCreatePostingTree { |
||||
RelFileNode node; |
||||
BlockNumber blkno; |
||||
uint32 nitem; |
||||
/* follows list of heap's ItemPointer */ |
||||
} ginxlogCreatePostingTree; |
||||
|
||||
#define XLOG_GIN_INSERT 0x20 |
||||
|
||||
typedef struct ginxlogInsert { |
||||
RelFileNode node; |
||||
BlockNumber blkno; |
||||
BlockNumber updateBlkno; |
||||
OffsetNumber offset; |
||||
bool isDelete; |
||||
bool isData; |
||||
bool isLeaf; |
||||
OffsetNumber nitem;
|
||||
|
||||
/* follows: tuples or ItemPointerData or PostingItem or list of ItemPointerData*/ |
||||
} ginxlogInsert; |
||||
|
||||
#define XLOG_GIN_SPLIT 0x30 |
||||
|
||||
typedef struct ginxlogSplit { |
||||
RelFileNode node; |
||||
BlockNumber lblkno; |
||||
BlockNumber rootBlkno; |
||||
BlockNumber rblkno; |
||||
BlockNumber rrlink; |
||||
OffsetNumber separator; |
||||
OffsetNumber nitem; |
||||
|
||||
bool isData; |
||||
bool isLeaf; |
||||
bool isRootSplit; |
||||
|
||||
BlockNumber leftChildBlkno; |
||||
BlockNumber updateBlkno; |
||||
|
||||
ItemPointerData rightbound; /* used only in posting tree */ |
||||
/* follows: list of tuple or ItemPointerData or PostingItem */ |
||||
} ginxlogSplit; |
||||
|
||||
#define XLOG_GIN_VACUUM_PAGE 0x40 |
||||
|
||||
typedef struct ginxlogVacuumPage { |
||||
RelFileNode node; |
||||
BlockNumber blkno; |
||||
OffsetNumber nitem; |
||||
/* follows content of page */ |
||||
} ginxlogVacuumPage; |
||||
|
||||
#define XLOG_GIN_DELETE_PAGE 0x50 |
||||
|
||||
typedef struct ginxlogDeletePage { |
||||
RelFileNode node; |
||||
BlockNumber blkno; |
||||
BlockNumber parentBlkno; |
||||
OffsetNumber parentOffset; |
||||
BlockNumber leftBlkno; |
||||
BlockNumber rightLink; |
||||
} ginxlogDeletePage; |
||||
|
||||
/* ginutil.c */ |
||||
extern void initGinState( GinState *state, Relation index ); |
||||
extern Buffer GinNewBuffer(Relation index); |
||||
extern void GinInitBuffer(Buffer b, uint32 f); |
||||
extern void GinInitPage(Page page, uint32 f, Size pageSize); |
||||
extern int compareEntries(GinState *ginstate, Datum a, Datum b);
|
||||
extern Datum* extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries); |
||||
extern Datum* extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries); |
||||
extern Page GinPageGetCopyPage( Page page ); |
||||
|
||||
/* gininsert.c */ |
||||
extern Datum ginbuild(PG_FUNCTION_ARGS); |
||||
extern Datum gininsert(PG_FUNCTION_ARGS); |
||||
|
||||
/* ginxlog.c */ |
||||
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record); |
||||
extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec); |
||||
extern void gin_xlog_startup(void); |
||||
extern void gin_xlog_cleanup(void); |
||||
|
||||
/* ginbtree.c */ |
||||
|
||||
typedef struct GinBtreeStack { |
||||
BlockNumber blkno; |
||||
Buffer buffer; |
||||
OffsetNumber off; |
||||
/* predictNumber contains prediction number of pages on current level */
|
||||
uint32 predictNumber; |
||||
struct GinBtreeStack *parent; |
||||
} GinBtreeStack; |
||||
|
||||
typedef struct GinBtreeData *GinBtree; |
||||
|
||||
typedef struct GinBtreeData { |
||||
/* search methods */ |
||||
BlockNumber (*findChildPage)(GinBtree, GinBtreeStack *); |
||||
bool (*isMoveRight)(GinBtree, Page); |
||||
bool (*findItem)(GinBtree, GinBtreeStack *); |
||||
|
||||
/* insert methods */ |
||||
OffsetNumber (*findChildPtr)(GinBtree, Page, BlockNumber, OffsetNumber); |
||||
BlockNumber (*getLeftMostPage)(GinBtree, Page); |
||||
bool (*isEnoughSpace)(GinBtree, Buffer, OffsetNumber); |
||||
void (*placeToPage)(GinBtree, Buffer, OffsetNumber, XLogRecData**); |
||||
Page (*splitPage)(GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData**); |
||||
void (*fillRoot)(GinBtree, Buffer, Buffer, Buffer); |
||||
|
||||
bool searchMode; |
||||
|
||||
Relation index; |
||||
GinState *ginstate; |
||||
bool fullScan; |
||||
bool isBuild; |
||||
|
||||
BlockNumber rightblkno; |
||||
|
||||
/* Entry options */ |
||||
Datum entryValue; |
||||
IndexTuple entry; |
||||
bool isDelete; |
||||
|
||||
/* Data (posting tree) option */ |
||||
ItemPointerData *items; |
||||
uint32 nitem; |
||||
uint32 curitem; |
||||
|
||||
PostingItem pitem; |
||||
} GinBtreeData; |
||||
|
||||
extern GinBtreeStack* ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno); |
||||
extern GinBtreeStack* ginFindLeafPage(GinBtree btree, GinBtreeStack *stack ); |
||||
extern void freeGinBtreeStack( GinBtreeStack *stack ); |
||||
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack); |
||||
extern void findParents( GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno); |
||||
|
||||
/* ginentrypage.c */ |
||||
extern IndexTuple GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd); |
||||
extern Datum ginGetHighKey(GinState *ginstate, Page page); |
||||
extern void prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstate); |
||||
extern void entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); |
||||
extern IndexTuple ginPageGetLinkItup(Buffer buf); |
||||
|
||||
/* gindatapage.c */ |
||||
extern int compareItemPointers( ItemPointer a, ItemPointer b ); |
||||
extern void MergeItemPointers( |
||||
ItemPointerData *dst,
|
||||
ItemPointerData *a, uint32 na,
|
||||
ItemPointerData *b, uint32 nb |
||||
); |
||||
|
||||
extern void GinDataPageAddItem( Page page, void *data, OffsetNumber offset ); |
||||
extern void PageDeletePostingItem(Page page, OffsetNumber offset); |
||||
|
||||
typedef struct { |
||||
GinBtreeData btree; |
||||
GinBtreeStack *stack; |
||||
} GinPostingTreeScan; |
||||
|
||||
extern GinPostingTreeScan* prepareScanPostingTree( Relation index,
|
||||
BlockNumber rootBlkno, bool searchMode); |
||||
extern void insertItemPointer(GinPostingTreeScan *gdi,
|
||||
ItemPointerData *items, uint32 nitem); |
||||
extern Buffer scanBeginPostingTree( GinPostingTreeScan *gdi ); |
||||
extern void dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); |
||||
extern void prepareDataScan( GinBtree btree, Relation index); |
||||
/* ginscan.c */ |
||||
|
||||
typedef struct GinScanEntryData *GinScanEntry; |
||||
|
||||
typedef struct GinScanEntryData { |
||||
/* link to the equals entry in current scan key */ |
||||
GinScanEntry master;
|
||||
|
||||
/* link to values reported to consistentFn,
|
||||
points to GinScanKey->entryRes[i]*/
|
||||
bool *pval;
|
||||
|
||||
/* entry, got from extractQueryFn */
|
||||
Datum entry; |
||||
|
||||
/* current ItemPointer to heap, its offset in buffer and buffer */ |
||||
ItemPointerData curItem; |
||||
OffsetNumber offset; |
||||
Buffer buffer; |
||||
|
||||
/* in case of Posing list */ |
||||
ItemPointerData *list; |
||||
uint32 nlist; |
||||
|
||||
bool isFinished; |
||||
bool reduceResult; |
||||
uint32 predictNumberResult; |
||||
} GinScanEntryData; |
||||
|
||||
typedef struct GinScanKeyData { |
||||
/* Number of entries in query (got by extractQueryFn) */
|
||||
uint32 nentries; |
||||
|
||||
/* array of ItemPointer result, reported to consistentFn */ |
||||
bool *entryRes; |
||||
|
||||
/* array of scans per entry */
|
||||
GinScanEntry scanEntry; |
||||
|
||||
/* for calling consistentFn(GinScanKey->entryRes, strategy, query) */ |
||||
StrategyNumber strategy; |
||||
Datum query; |
||||
|
||||
ItemPointerData curItem; |
||||
bool firstCall; |
||||
bool isFinished; |
||||
} GinScanKeyData; |
||||
|
||||
typedef GinScanKeyData *GinScanKey;
|
||||
|
||||
typedef struct GinScanOpaqueData { |
||||
MemoryContext tempCtx; |
||||
GinState ginstate; |
||||
|
||||
GinScanKey keys; |
||||
uint32 nkeys; |
||||
|
||||
GinScanKey markPos; |
||||
} GinScanOpaqueData; |
||||
|
||||
typedef GinScanOpaqueData *GinScanOpaque; |
||||
|
||||
extern Datum ginbeginscan(PG_FUNCTION_ARGS); |
||||
extern Datum ginendscan(PG_FUNCTION_ARGS); |
||||
extern Datum ginrescan(PG_FUNCTION_ARGS); |
||||
extern Datum ginmarkpos(PG_FUNCTION_ARGS); |
||||
extern Datum ginrestrpos(PG_FUNCTION_ARGS); |
||||
extern void newScanKey( IndexScanDesc scan ); |
||||
|
||||
/* ginget.c */ |
||||
extern DLLIMPORT int GinFuzzySearchLimit; |
||||
|
||||
#define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff ) |
||||
#define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff ) |
||||
#define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0) |
||||
#define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 ) |
||||
|
||||
extern Datum gingetmulti(PG_FUNCTION_ARGS); |
||||
extern Datum gingettuple(PG_FUNCTION_ARGS); |
||||
|
||||
/* ginvacuum.c */ |
||||
extern Datum ginbulkdelete(PG_FUNCTION_ARGS); |
||||
extern Datum ginvacuumcleanup(PG_FUNCTION_ARGS); |
||||
|
||||
/* ginarrayproc.c */ |
||||
extern Datum ginarrayextract(PG_FUNCTION_ARGS); |
||||
extern Datum ginarrayconsistent(PG_FUNCTION_ARGS); |
||||
|
||||
/* I'm not sure that is the best place */ |
||||
extern Datum arrayoverlap(PG_FUNCTION_ARGS); |
||||
extern Datum arraycontains(PG_FUNCTION_ARGS); |
||||
extern Datum arraycontained(PG_FUNCTION_ARGS); |
||||
|
||||
/* ginbulk.c */ |
||||
typedef struct { |
||||
Datum value; |
||||
uint32 length; |
||||
uint32 number; |
||||
ItemPointerData *list; |
||||
bool shouldSort; |
||||
} EntryAccumulator; |
||||
|
||||
typedef struct { |
||||
GinState *ginstate; |
||||
EntryAccumulator *entries; |
||||
uint32 length; |
||||
uint32 number; |
||||
uint32 curget; |
||||
uint32 allocatedMemory; |
||||
} BuildAccumulator; |
||||
|
||||
extern void ginInitBA(BuildAccumulator *accum); |
||||
extern void ginInsertRecordBA( BuildAccumulator *accum,
|
||||
ItemPointer heapptr, Datum *entries, uint32 nentry ); |
||||
extern ItemPointerData* ginGetEntry(BuildAccumulator *accum, Datum *entry, uint32 *n); |
||||
|
||||
#endif |
@ -0,0 +1,100 @@ |
||||
1 {92,75,71,52,64,83} {AAAAAAAA44066,AAAAAA1059,AAAAAAAAAAA176,AAAAAAA48038} |
||||
2 {3,6} {AAAAAA98232,AAAAAAAA79710,AAAAAAAAAAAAAAAAA69675,AAAAAAAAAAAAAAAA55798,AAAAAAAAA12793} |
||||
3 {37,64,95,43,3,41,13,30,11,43} {AAAAAAAAAA48845,AAAAA75968,AAAAA95309,AAA54451,AAAAAAAAAA22292,AAAAAAA99836,A96617,AA17009,AAAAAAAAAAAAAA95246} |
||||
4 {71,39,99,55,33,75,45} {AAAAAAAAA53663,AAAAAAAAAAAAAAA67062,AAAAAAAAAA64777,AAA99043,AAAAAAAAAAAAAAAAAAA91804,39557} |
||||
5 {50,42,77,50,4} {AAAAAAAAAAAAAAAAA26540,AAAAAAA79710,AAAAAAAAAAAAAAAAAAA1205,AAAAAAAAAAA176,AAAAA95309,AAAAAAAAAAA46154,AAAAAA66777,AAAAAAAAA27249,AAAAAAAAAA64777,AAAAAAAAAAAAAAAAAAA70104} |
||||
6 {39,35,5,94,17,92,60,32} {AAAAAAAAAAAAAAA35875,AAAAAAAAAAAAAAAA23657} |
||||
7 {12,51,88,64,8} {AAAAAAAAAAAAAAAAAA12591,AAAAAAAAAAAAAAAAA50407,AAAAAAAAAAAA67946} |
||||
8 {60,84} {AAAAAAA81898,AAAAAA1059,AAAAAAAAAAAA81511,AAAAA961,AAAAAAAAAAAAAAAA31334,AAAAA64741,AA6416,AAAAAAAAAAAAAAAAAA32918,AAAAAAAAAAAAAAAAA50407} |
||||
9 {56,52,35,27,80,44,81,22} {AAAAAAAAAAAAAAA73034,AAAAAAAAAAAAA7929,AAAAAAA66161,AA88409,39557,A27153,AAAAAAAA9523,AAAAAAAAAAA99000} |
||||
10 {71,5,45} {AAAAAAAAAAA21658,AAAAAAAAAAAA21089,AAA54451,AAAAAAAAAAAAAAAAAA54141,AAAAAAAAAAAAAA28620,AAAAAAAAAAA21658,AAAAAAAAAAA74076,AAAAAAAAA27249} |
||||
11 {41,86,74,48,22,74,47,50} {AAAAAAAA9523,AAAAAAAAAAAA37562,AAAAAAAAAAAAAAAA14047,AAAAAAAAAAA46154,AAAA41702,AAAAAAAAAAAAAAAAA764,AAAAA62737,39557} |
||||
12 {17,99,18,52,91,72,0,43,96,23} {AAAAA33250,AAAAAAAAAAAAAAAAAAA85420,AAAAAAAAAAA33576} |
||||
13 {3,52,34,23} {AAAAAA98232,AAAA49534,AAAAAAAAAAA21658} |
||||
14 {78,57,19} {AAAA8857,AAAAAAAAAAAAAAA73034,AAAAAAAA81587,AAAAAAAAAAAAAAA68526,AAAAA75968,AAAAAAAAAAAAAA65909,AAAAAAAAA10012,AAAAAAAAAAAAAA65909} |
||||
15 {17,14,16,63,67} {AA6416,AAAAAAAAAA646,AAAAA95309} |
||||
16 {14,63,85,11} {AAAAAA66777} |
||||
17 {7,10,81,85} {AAAAAA43678,AAAAAAA12144,AAAAAAAAAAA50956,AAAAAAAAAAAAAAAAAAA15356} |
||||
18 {1} {AAAAAAAAAAA33576,AAAAA95309,64261,AAA59323,AAAAAAAAAAAAAA95246,55847,AAAAAAAAAAAA67946,AAAAAAAAAAAAAAAAAA64374} |
||||
19 {52,82,17,74,23,46,69,51,75} {AAAAAAAAAAAAA73084,AAAAA75968,AAAAAAAAAAAAAAAA14047,AAAAAAA80240,AAAAAAAAAAAAAAAAAAA1205,A68938} |
||||
20 {72,89,70,51,54,37,8,49,79} {AAAAAA58494} |
||||
21 {2,8,65,10,5,79,43} {AAAAAAAAAAAAAAAAA88852,AAAAAAAAAAAAAAAAAAA91804,AAAAA64669,AAAAAAAAAAAAAAAA1443,AAAAAAAAAAAAAAAA23657,AAAAA12179,AAAAAAAAAAAAAAAAA88852,AAAAAAAAAAAAAAAA31334,AAAAAAAAAAAAAAAA41303,AAAAAAAAAAAAAAAAAAA85420} |
||||
22 {11,6,56,62,53,30} {AAAAAAAA72908} |
||||
23 {40,90,5,38,72,40,30,10,43,55} {A6053,AAAAAAAAAAA6119,AA44673,AAAAAAAAAAAAAAAAA764,AA17009,AAAAA17383,AAAAA70514,AAAAA33250,AAAAA95309,AAAAAAAAAAAA37562} |
||||
24 {94,61,99,35,48} {AAAAAAAAAAA50956,AAAAAAAAAAA15165,AAAA85070,AAAAAAAAAAAAAAA36627,AAAAA961,AAAAAAAAAA55219} |
||||
25 {31,1,10,11,27,79,38} {AAAAAAAAAAAAAAAAAA59334,45449} |
||||
26 {71,10,9,69,75} {47735,AAAAAAA21462,AAAAAAAAAAAAAAAAA6897,AAAAAAAAAAAAAAAAAAA91804,AAAAAAAAA72121,AAAAAAAAAAAAAAAAAAA1205,AAAAA41597,AAAA8857,AAAAAAAAAAAAAAAAAAA15356,AA17009} |
||||
27 {94} {AA6416,A6053,AAAAAAA21462,AAAAAAA57334,AAAAAAAAAAAAAAAAAA12591,AA88409,AAAAAAAAAAAAA70254} |
||||
28 {14,33,6,34,14} {AAAAAAAAAAAAAAA13198,AAAAAAAA69452,AAAAAAAAAAA82945,AAAAAAA12144,AAAAAAAAA72121,AAAAAAAAAA18601} |
||||
29 {39,21} {AAAAAAAAAAAAAAAAA6897,AAAAAAAAAAAAAAAAAAA38885,AAAA85070,AAAAAAAAAAAAAAAAAAA70104,AAAAA66674,AAAAAAAAAAAAA62007,AAAAAAAA69452,AAAAAAA1242,AAAAAAAAAAAAAAAA1729,AAAA35194} |
||||
30 {26,81,47,91,34} {AAAAAAAAAAAAAAAAAAA70104,AAAAAAA80240} |
||||
31 {80,24,18,21,54} {AAAAAAAAAAAAAAA13198,AAAAAAAAAAAAAAAAAAA70415,A27153,AAAAAAAAA53663,AAAAAAAAAAAAAAAAA50407,A68938} |
||||
32 {58,79,82,80,67,75,98,10,41} {AAAAAAAAAAAAAAAAAA61286,AAA54451,AAAAAAAAAAAAAAAAAAA87527,A96617,51533} |
||||
33 {74,73} {A85417,AAAAAAA56483,AAAAA17383,AAAAAAAAAAAAA62159,AAAAAAAAAAAA52814,AAAAAAAAAAAAA85723,AAAAAAAAAAAAAAAAAA55796} |
||||
34 {70,45} {AAAAAAAAAAAAAAAAAA71621,AAAAAAAAAAAAAA28620,AAAAAAAAAA55219,AAAAAAAA23648,AAAAAAAAAA22292,AAAAAAA1242} |
||||
35 {23,40} {AAAAAAAAAAAA52814,AAAA48949,AAAAAAAAA34727,AAAA8857,AAAAAAAAAAAAAAAAAAA62179,AAAAAAAAAAAAAAA68526,AAAAAAA99836,AAAAAAAA50094,AAAA91194,AAAAAAAAAAAAA73084} |
||||
36 {79,82,14,52,30,5,79} {AAAAAAAAA53663,AAAAAAAAAAAAAAAA55798,AAAAAAAAAAAAAAAAAAA89194,AA88409,AAAAAAAAAAAAAAA81326,AAAAAAAAAAAAAAAAA63050,AAAAAAAAAAAAAAAA33598} |
||||
37 {53,11,81,39,3,78,58,64,74} {AAAAAAAAAAAAAAAAAAA17075,AAAAAAA66161,AAAAAAAA23648,AAAAAAAAAAAAAA10611} |
||||
38 {59,5,4,95,28} {AAAAAAAAAAA82945,A96617,47735,AAAAA12179,AAAAA64669,AAAAAA99807,AA74433,AAAAAAAAAAAAAAAAA59387} |
||||
39 {82,43,99,16,74} {AAAAAAAAAAAAAAA67062,AAAAAAA57334,AAAAAAAAAAAAAA65909,A27153,AAAAAAAAAAAAAAAAAAA17075,AAAAAAAAAAAAAAAAA43052,AAAAAAAAAA64777,AAAAAAAAAAAA81511,AAAAAAAAAAAAAA65909,AAAAAAAAAAAAAA28620} |
||||
40 {34} {AAAAAAAAAAAAAA10611,AAAAAAAAAAAAAAAAAAA1205,AAAAAAAAAAA50956,AAAAAAAAAAAAAAAA31334,AAAAA70466,AAAAAAAA81587,AAAAAAA74623} |
||||
41 {19,26,63,12,93,73,27,94} {AAAAAAA79710,AAAAAAAAAA55219,AAAA41702,AAAAAAAAAAAAAAAAAAA17075,AAAAAAAAAAAAAAAAAA71621,AAAAAAAAAAAAAAAAA63050,AAAAAAA99836,AAAAAAAAAAAAAA8666} |
||||
42 {15,76,82,75,8,91} {AAAAAAAAAAA176,AAAAAA38063,45449,AAAAAA54032,AAAAAAA81898,AA6416,AAAAAAAAAAAAAAAAAAA62179,45449,AAAAA60038,AAAAAAAA81587} |
||||
43 {39,87,91,97,79,28} {AAAAAAAAAAA74076,A96617,AAAAAAAAAAAAAAAAAAA89194,AAAAAAAAAAAAAAAAAA55796,AAAAAAAAAAAAAAAA23657,AAAAAAAAAAAA67946} |
||||
44 {40,58,68,29,54} {AAAAAAA81898,AAAAAA66777,AAAAAA98232} |
||||
45 {99,45} {AAAAAAAA72908,AAAAAAAAAAAAAAAAAAA17075,AA88409,AAAAAAAAAAAAAAAAAA36842,AAAAAAA48038,AAAAAAAAAAAAAA10611} |
||||
46 {53,24} {AAAAAAAAAAA53908,AAAAAA54032,AAAAA17383,AAAA48949,AAAAAAAAAA18601,AAAAA64669,45449,AAAAAAAAAAA98051,AAAAAAAAAAAAAAAAAA71621} |
||||
47 {98,23,64,12,75,61} {AAA59323,AAAAA95309,AAAAAAAAAAAAAAAA31334,AAAAAAAAA27249,AAAAA17383,AAAAAAAAAAAA37562,AAAAAA1059,A84822,55847,AAAAA70466} |
||||
48 {76,14} {AAAAAAAAAAAAA59671,AAAAAAAAAAAAAAAAAAA91804,AAAAAA66777,AAAAAAAAAAAAAAAAAAA89194,AAAAAAAAAAAAAAA36627,AAAAAAAAAAAAAAAAAAA17075,AAAAAAAAAAAAA73084,AAAAAAA79710,AAAAAAAAAAAAAAA40402,AAAAAAAAAAAAAAAAAAA65037} |
||||
49 {56,5,54,37,49} {AA21643,AAAAAAAAAAA92631,AAAAAAAA81587} |
||||
50 {20,12,37,64,93} {AAAAAAAAAA5483,AAAAAAAAAAAAAAAAAAA1205,AA6416,AAAAAAAAAAAAAAAAA63050,AAAAAAAAAAAAAAAAAA47955} |
||||
51 {47} {AAAAAAAAAAAAAA96505,AAAAAAAAAAAAAAAAAA36842,AAAAA95309,AAAAAAAA81587,AA6416,AAAA91194,AAAAAA58494,AAAAAA1059,AAAAAAAA69452} |
||||
52 {89,0} {AAAAAAAAAAAAAAAAAA47955,AAAAAAA48038,AAAAAAAAAAAAAAAAA43052,AAAAAAAAAAAAA73084,AAAAA70466,AAAAAAAAAAAAAAAAA764,AAAAAAAAAAA46154,AA66862} |
||||
53 {38,17} {AAAAAAAAAAA21658} |
||||
54 {70,47} {AAAAAAAAAAAAAAAAAA54141,AAAAA40681,AAAAAAA48038,AAAAAAAAAAAAAAAA29150,AAAAA41597,AAAAAAAAAAAAAAAAAA59334,AA15322} |
||||
55 {47,79,47,64,72,25,71,24,93} {AAAAAAAAAAAAAAAAAA55796,AAAAA62737} |
||||
56 {33,7,60,54,93,90,77,85,39} {AAAAAAAAAAAAAAAAAA32918,AA42406} |
||||
57 {23,45,10,42,36,21,9,96} {AAAAAAAAAAAAAAAAAAA70415} |
||||
58 {92} {AAAAAAAAAAAAAAAA98414,AAAAAAAA23648,AAAAAAAAAAAAAAAAAA55796,AA25381,AAAAAAAAAAA6119} |
||||
59 {9,69,46,77} {39557,AAAAAAA89932,AAAAAAAAAAAAAAAAA43052,AAAAAAAAAAAAAAAAA26540,AAA20874,AA6416,AAAAAAAAAAAAAAAAAA47955} |
||||
60 {62,2,59,38,89} {AAAAAAA89932,AAAAAAAAAAAAAAAAAAA15356,AA99927,AA17009,AAAAAAAAAAAAAAA35875} |
||||
61 {72,2,44,95,54,54,13} {AAAAAAAAAAAAAAAAAAA91804} |
||||
62 {83,72,29,73} {AAAAAAAAAAAAA15097,AAAA8857,AAAAAAAAAAAA35809,AAAAAAAAAAAA52814,AAAAAAAAAAAAAAAAAAA38885,AAAAAAAAAAAAAAAAAA24183,AAAAAA43678,A96617} |
||||
63 {11,4,61,87} {AAAAAAAAA27249,AAAAAAAAAAAAAAAAAA32918,AAAAAAAAAAAAAAA13198,AAA20874,39557,51533,AAAAAAAAAAA53908,AAAAAAAAAAAAAA96505,AAAAAAAA78938} |
||||
64 {26,19,34,24,81,78} {A96617,AAAAAAAAAAAAAAAAAAA70104,A68938,AAAAAAAAAAA53908,AAAAAAAAAAAAAAA453,AA17009,AAAAAAA80240} |
||||
65 {61,5,76,59,17} {AAAAAA99807,AAAAA64741,AAAAAAAAAAA53908,AA21643,AAAAAAAAA10012} |
||||
66 {31,23,70,52,4,33,48,25} {AAAAAAAAAAAAAAAAA69675,AAAAAAAA50094,AAAAAAAAAAA92631,AAAA35194,39557,AAAAAAA99836} |
||||
67 {31,94,7,10} {AAAAAA38063,A96617,AAAA35194,AAAAAAAAAAAA67946} |
||||
68 {90,43,38} {AA75092,AAAAAAAAAAAAAAAAA69675,AAAAAAAAAAA92631,AAAAAAAAA10012,AAAAAAAAAAAAA7929,AA21643} |
||||
69 {67,35,99,85,72,86,44} {AAAAAAAAAAAAAAAAAAA1205,AAAAAAAA50094,AAAAAAAAAAAAAAAA1729,AAAAAAAAAAAAAAAAAA47955} |
||||
70 {56,70,83} {AAAA41702,AAAAAAAAAAA82945,AA21643,AAAAAAAAAAA99000,A27153,AA25381,AAAAAAAAAAAAAA96505,AAAAAAA1242} |
||||
71 {74,26} {AAAAAAAAAAA50956,AA74433,AAAAAAA21462,AAAAAAAAAAAAAAAAAAA17075,AAAAAAAAAAAAAAA36627,AAAAAAAAAAAAA70254,AAAAAAAAAA43419,39557} |
||||
72 {22,1,16,78,20,91,83} {47735,AAAAAAA56483,AAAAAAAAAAAAA93788,AA42406,AAAAAAAAAAAAA73084,AAAAAAAA72908,AAAAAAAAAAAAAAAAAA61286,AAAAA66674,AAAAAAAAAAAAAAAAA50407} |
||||
73 {88,25,96,78,65,15,29,19} {AAA54451,AAAAAAAAA27249,AAAAAAA9228,AAAAAAAAAAAAAAA67062,AAAAAAAAAAAAAAAAAAA70415,AAAAA17383,AAAAAAAAAAAAAAAA33598} |
||||
74 {32} {AAAAAAAAAAAAAAAA1729,AAAAAAAAAAAAA22860,AAAAAA99807,AAAAA17383,AAAAAAAAAAAAAAA67062,AAAAAAAAAAA15165,AAAAAAAAAAA50956} |
||||
75 {12,96,83,24,71,89,55} {AAAA48949,AAAAAAAA29716,AAAAAAAAAAAAAAAAAAA1205,AAAAAAAAAAAA67946,AAAAAAAAAAAAAAAA29150,AAA28075,AAAAAAAAAAAAAAAAA43052} |
||||
76 {92,55,10,7} {AAAAAAAAAAAAAAA67062} |
||||
77 {97,15,32,17,55,59,18,37,50,39} {AAAAAAAAAAAA67946,AAAAAA54032,AAAAAAAA81587,55847,AAAAAAAAAAAAAA28620,AAAAAAAAAAAAAAAAA43052,AAAAAA75463,AAAA49534,AAAAAAAA44066} |
||||
78 {55,89,44,84,34} {AAAAAAAAAAA6119,AAAAAAAAAAAAAA8666,AA99927,AA42406,AAAAAAA81898,AAAAAAA9228,AAAAAAAAAAA92631,AA21643,AAAAAAAAAAAAAA28620} |
||||
79 {45} {AAAAAAAAAA646,AAAAAAAAAAAAAAAAAAA70415,AAAAAA43678,AAAAAAAA72908} |
||||
80 {74,89,44,80,0} {AAAA35194,AAAAAAAA79710,AAA20874,AAAAAAAAAAAAAAAAAAA70104,AAAAAAAAAAAAA73084,AAAAAAA57334,AAAAAAA9228,AAAAAAAAAAAAA62007} |
||||
81 {63,77,54,48,61,53,97} {AAAAAAAAAAAAAAA81326,AAAAAAAAAA22292,AA25381,AAAAAAAAAAA74076,AAAAAAA81898,AAAAAAAAA72121} |
||||
82 {34,60,4,79,78,16,86,89,42,50} {AAAAA40681,AAAAAAAAAAAAAAAAAA12591,AAAAAAA80240,AAAAAAAAAAAAAAAA55798,AAAAAAAAAAAAAAAAAAA70104} |
||||
83 {14,10} {AAAAAAAAAA22292,AAAAAAAAAAAAA70254,AAAAAAAAAAA6119} |
||||
84 {11,83,35,13,96,94} {AAAAA95309,AAAAAAAAAAAAAAAAAA32918,AAAAAAAAAAAAAAAAAA24183} |
||||
85 {39,60} {AAAAAAAAAAAAAAAA55798,AAAAAAAAAA22292,AAAAAAA66161,AAAAAAA21462,AAAAAAAAAAAAAAAAAA12591,55847,AAAAAA98232,AAAAAAAAAAA46154} |
||||
86 {33,81,72,74,45,36,82} {AAAAAAAA81587,AAAAAAAAAAAAAA96505,45449,AAAA80176} |
||||
87 {57,27,50,12,97,68} {AAAAAAAAAAAAAAAAA26540,AAAAAAAAA10012,AAAAAAAAAAAA35809,AAAAAAAAAAAAAAAA29150,AAAAAAAAAAA82945,AAAAAA66777,31228,AAAAAAAAAAAAAAAA23657,AAAAAAAAAAAAAA28620,AAAAAAAAAAAAAA96505} |
||||
88 {41,90,77,24,6,24} {AAAA35194,AAAA35194,AAAAAAA80240,AAAAAAAAAAA46154,AAAAAA58494,AAAAAAAAAAAAAAAAAAA17075,AAAAAAAAAAAAAAAAAA59334,AAAAAAAAAAAAAAAAAAA91804,AA74433} |
||||
89 {40,32,17,6,30,88} {AA44673,AAAAAAAAAAA6119,AAAAAAAAAAAAAAAA23657,AAAAAAAAAAAAAAAAAA47955,AAAAAAAAAAAAAAAA33598,AAAAAAAAAAA33576,AA44673} |
||||
90 {88,75} {AAAAA60038,AAAAAAAA23648,AAAAAAAAAAA99000,AAAA41702,AAAAAAAAAAAAA22860,AAAAAAAAAAAAAAA68526} |
||||
91 {78} {AAAAAAAAAAAAA62007,AAA99043} |
||||
92 {85,63,49,45} {AAAAAAA89932,AAAAAAAAAAAAA22860,AAAAAAAAAAAAAAAAAAA1205,AAAAAAAAAAAA21089} |
||||
93 {11} {AAAAAAAAAAA176,AAAAAAAAAAAAAA8666,AAAAAAAAAAAAAAA453,AAAAAAAAAAAAA85723,A68938,AAAAAAAAAAAAA9821,AAAAAAA48038,AAAAAAAAAAAAAAAAA59387,AA99927,AAAAA17383} |
||||
94 {98,9,85,62,88,91,60,61,38,86} {AAAAAAAA81587,AAAAA17383,AAAAAAAA81587} |
||||
95 {47,77} {AAAAAAAAAAAAAAAAA764,AAAAAAAAAAA74076,AAAAAAAAAA18107,AAAAA40681,AAAAAAAAAAAAAAA35875,AAAAA60038,AAAAAAA56483} |
||||
96 {23,97,43} {AAAAAAAAAA646,A87088} |
||||
97 {54,2,86,65} {47735,AAAAAAA99836,AAAAAAAAAAAAAAAAA6897,AAAAAAAAAAAAAAAA29150,AAAAAAA80240,AAAAAAAAAAAAAAAA98414,AAAAAAA56483,AAAAAAAAAAAAAAAA29150,AAAAAAA39692,AA21643} |
||||
98 {38,34,32,89} {AAAAAAAAAAAAAAAAAA71621,AAAA8857,AAAAAAAAAAAAAAAAAAA65037,AAAAAAAAAAAAAAAA31334,AAAAAAAAAA48845} |
||||
99 {37,86} {AAAAAAAAAAAAAAAAAA32918,AAAAA70514,AAAAAAAAA10012,AAAAAAAAAAAAAAAAA59387,AAAAAAAAAA64777,AAAAAAAAAAAAAAAAAAA15356} |
||||
100 {85,32,57,39,49,84,32,3,30} {AAAAAAA80240,AAAAAAAAAAAAAAAA1729,AAAAA60038,AAAAAAAAAAA92631,AAAAAAAA9523} |
Loading…
Reference in new issue