mirror of https://github.com/postgres/postgres
parent
0ad7db4be4
commit
1db943b3ca
@ -0,0 +1,69 @@ |
||||
subdir = contrib/intarray
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
|
||||
# override libdir to install shlib in contrib not main directory
|
||||
libdir := $(libdir)/contrib
|
||||
|
||||
# shared library parameters
|
||||
NAME= _int
|
||||
SO_MAJOR_VERSION= 1
|
||||
SO_MINOR_VERSION= 0
|
||||
|
||||
override CPPFLAGS += -I$(srcdir) -DPGSQL71
|
||||
|
||||
OBJS= _int.o
|
||||
|
||||
all: all-lib $(NAME).sql |
||||
|
||||
# Shared library stuff
|
||||
include $(top_srcdir)/src/Makefile.shlib |
||||
|
||||
|
||||
$(NAME).sql: $(NAME).sql.in |
||||
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
|
||||
|
||||
.PHONY: submake |
||||
submake: |
||||
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
|
||||
|
||||
# against installed postmaster
|
||||
installcheck: submake |
||||
@echo "'make installcheck' is not supported."
|
||||
|
||||
installcheck: submake |
||||
$(top_builddir)/src/test/regress/pg_regress _int
|
||||
|
||||
# in-tree test doesn't work yet (no way to install my shared library)
|
||||
#check: all submake
|
||||
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
|
||||
# --top-builddir=$(top_builddir) _int
|
||||
check: |
||||
@echo "'make check' is not supported."
|
||||
@echo "Do 'make install', then 'make installcheck' instead."
|
||||
|
||||
install: all installdirs install-lib |
||||
#$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
|
||||
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
|
||||
|
||||
installdirs: |
||||
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
|
||||
|
||||
uninstall: uninstall-lib |
||||
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
|
||||
|
||||
clean distclean maintainer-clean: clean-lib |
||||
rm -f *.so y.tab.c y.tab.h $(OBJS) $(NAME).sql
|
||||
# things created by various check targets
|
||||
rm -rf results tmp_check log
|
||||
rm -f regression.diffs regression.out regress.out run_check.out
|
||||
ifeq ($(PORTNAME), win) |
||||
rm -f regress.def
|
||||
endif |
||||
|
||||
depend dep: |
||||
$(CC) -MM $(CFLAGS) *.c >depend
|
||||
|
||||
ifeq (depend,$(wildcard depend)) |
||||
include depend |
||||
endif |
@ -0,0 +1,64 @@ |
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile --
|
||||
#
|
||||
# Makefile for Enzyme Commission catalogue number type -- ec_code
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
PGDIR = ../..
|
||||
SRCDIR = $(PGDIR)/src
|
||||
|
||||
include $(SRCDIR)/Makefile.global |
||||
|
||||
INCLUDE_OPT = -I ./ \
|
||||
-I $(SRCDIR)/ \
|
||||
-I $(SRCDIR)/include \
|
||||
-I $(SRCDIR)/port/$(PORTNAME)
|
||||
|
||||
CFLAGS += $(INCLUDE_OPT) $(CFLAGS_SL)
|
||||
|
||||
MODNAME = _int
|
||||
OBJFILES = $(MODNAME).o
|
||||
|
||||
SQLDEFS = $(MODNAME).sql
|
||||
|
||||
MODULE = $(MODNAME)$(DLSUFFIX)
|
||||
|
||||
MODDIR = $(LIBDIR)/modules
|
||||
|
||||
SQLDIR = $(LIBDIR)/sql
|
||||
|
||||
all: module sql |
||||
|
||||
module: $(MODULE) |
||||
|
||||
sql: $(SQLDEFS) |
||||
|
||||
$(MODULE): $(OBJFILES) |
||||
$(CC) $(CFLAGS) -shared -o $@ $(OBJFILES)
|
||||
|
||||
install: $(MODULE) $(SQLDEFS) $(MODDIR) $(SQLDIR) |
||||
cp -p $(MODULE) $(MODDIR)/
|
||||
strip $(MODDIR)/$(MODULE)
|
||||
cp -p $(SQLDEFS) $(SQLDIR)/
|
||||
|
||||
$(MODDIR): |
||||
mkdir -p $@
|
||||
|
||||
$(SQLDIR): |
||||
mkdir -p $@
|
||||
|
||||
%.sql: %.sql.in |
||||
sed "s|MODULE_PATHNAME|$(MODDIR)/$(MODULE)|" < $< > $@
|
||||
|
||||
depend dep: |
||||
$(CC) -MM $(INCLUDE_OPT) *.c >depend
|
||||
|
||||
clean: |
||||
rm -f $(MODULE) $(SQLDEFS) *$(DLSUFFIX)
|
||||
rm -f *~ *# *.b *.o *.output *.tab.h $(MODNAME)parse.h $(MODNAME)parse.c $(MODNAME)scan.c
|
||||
|
||||
ifeq (depend,$(wildcard depend)) |
||||
include depend |
||||
endif |
@ -0,0 +1,81 @@ |
||||
This is an implementation of RD-tree data structure using GiST interface |
||||
of PostgreSQL. It has built-in lossy compression - must be declared |
||||
in index creation - with (islossy). Current implementation has index support |
||||
for one-dimensional array of int4's. |
||||
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov |
||||
(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist |
||||
for additional information. |
||||
|
||||
INSTALLATION: |
||||
|
||||
gmake |
||||
gmake install |
||||
-- load functions |
||||
psql <database> < _int.sql |
||||
|
||||
REGRESSION TEST: |
||||
|
||||
gmake installcheck |
||||
|
||||
EXAMPLE USAGE: |
||||
|
||||
create table message (mid int not null,sections int[]); |
||||
create table message_section_map (mid int not null,sid int not null); |
||||
|
||||
-- create indices |
||||
CREATE unique index message_key on message ( mid ); |
||||
CREATE unique index message_section_map_key2 on message_section_map (sid, mid ); |
||||
CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy ); |
||||
|
||||
-- select some messages with section in 1 OR 2 - OVERLAP operator |
||||
select message.mid from message where message.sections && '{1,2}'; |
||||
|
||||
-- select messages contains in sections 1 AND 2 - CONTAINS operator |
||||
select message.mid from message where message.sections @ '{1,2}'; |
||||
-- the same, CONTAINED operator |
||||
select message.mid from message where '{1,2}' ~ message.sections; |
||||
|
||||
BENCHMARK: |
||||
|
||||
subdirectory bench contains benchmark suite. |
||||
cd ./bench |
||||
1. createdb TEST |
||||
2. psql TEST < ../_int.sql |
||||
3. ./create_test.pl | psql TEST |
||||
4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries |
||||
with/without RD-Tree. Run script without arguments to |
||||
see availbale options. |
||||
|
||||
a)test without RD-Tree (OR) |
||||
./bench.pl -d TEST -s 1,2 -v |
||||
b)test with RD-Tree |
||||
./bench.pl -d TEST -s 1,2 -v -r |
||||
|
||||
BENCHMARKS: |
||||
|
||||
Size of table <message>: 200000 |
||||
Size of table <message_section_map>: 268538 |
||||
|
||||
Distribution of messages by sections: |
||||
|
||||
section 0: 73899 messages |
||||
section 1: 16298 messages |
||||
section 50: 1241 messages |
||||
section 99: 705 messages |
||||
|
||||
old - without RD-Tree support, |
||||
new - with RD-Tree |
||||
|
||||
+----------+---------------+----------------+ |
||||
|Search set|OR, time in sec|AND, time in sec| |
||||
| +-------+-------+--------+-------+ |
||||
| | old | new | old | new | |
||||
+----------+-------+-------+--------+-------+ |
||||
| 1| 1.427| 0.215| -| -| |
||||
+----------+-------+-------+--------+-------+ |
||||
| 99| 1.029| 0.018| -| -| |
||||
+----------+-------+-------+--------+-------+ |
||||
| 1,2| 1.829| 0.334| 5.654| 0.042| |
||||
+----------+-------+-------+--------+-------+ |
||||
| 1,2,50,60| 2.057| 0.359| 5.044| 0.007| |
||||
+----------+-------+-------+--------+-------+ |
@ -0,0 +1,842 @@ |
||||
/******************************************************************************
|
||||
This file contains routines that can be bound to a Postgres backend and |
||||
called by the backend in the process of processing queries. The calling |
||||
format for these routines is dictated by Postgres architecture. |
||||
******************************************************************************/ |
||||
|
||||
#include <stdio.h> |
||||
#include <float.h> |
||||
#include <string.h> |
||||
|
||||
#include "postgres.h" |
||||
#include "access/gist.h" |
||||
#include "access/itup.h" |
||||
#include "access/rtree.h" |
||||
#include "utils/elog.h" |
||||
#include "utils/palloc.h" |
||||
#include "utils/array.h" |
||||
#include "utils/builtins.h" |
||||
#include "storage/bufpage.h" |
||||
|
||||
#define MAXNUMRANGE 100 |
||||
|
||||
#define max(a,b) ((a) > (b) ? (a) : (b)) |
||||
#define min(a,b) ((a) <= (b) ? (a) : (b)) |
||||
#define abs(a) ((a) < (0) ? (-a) : (a)) |
||||
|
||||
#define ARRPTR(x) ( (int4 *) ARR_DATA_PTR(x) ) |
||||
#ifdef PGSQL71 |
||||
#define ARRSIZE(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x)) |
||||
#else |
||||
#define ARRSIZE(x) getNitems( ARR_NDIM(x), ARR_DIMS(x)) |
||||
#endif |
||||
|
||||
#define NDIM 1 |
||||
#define ARRISNULL(x) ( (x) ? ( ( ARR_NDIM(x) == NDIM ) ? ( ( ARRSIZE( x ) ) ? 0 : 1 ) : 1 ) : 1 ) |
||||
#define SORT(x) if ( ARRSIZE( x ) > 1 ) isort( (void*)ARRPTR( x ), ARRSIZE( x ) ); |
||||
#define PREPAREARR(x) \ |
||||
if ( ARRSIZE( x ) > 1 ) {\
|
||||
if ( isort( (void*)ARRPTR( x ), ARRSIZE( x ) ) )\
|
||||
x = _int_unique( x );\
|
||||
} |
||||
/*
|
||||
#define GIST_DEBUG |
||||
#define GIST_QUERY_DEBUG |
||||
*/ |
||||
#ifdef GIST_DEBUG |
||||
static void printarr ( ArrayType * a, int num ) { |
||||
char bbb[16384]; |
||||
char *cur; |
||||
int l; |
||||
int *d; |
||||
d = ARRPTR( a ); |
||||
*bbb = '\0'; |
||||
cur = bbb; |
||||
for(l=0; l<min( num, ARRSIZE( a ));l++) { |
||||
sprintf(cur,"%d ", d[l] ); |
||||
cur = strchr( cur, '\0' ) ; |
||||
} |
||||
elog(NOTICE, "\t\t%s", bbb); |
||||
} |
||||
#endif |
||||
|
||||
/*
|
||||
** usefull function |
||||
*/ |
||||
bool isort( int *a, const int len ); |
||||
ArrayType * new_intArrayType( int num ); |
||||
ArrayType * copy_intArrayType( ArrayType * a ); |
||||
ArrayType * resize_intArrayType( ArrayType * a, int num ); |
||||
int internal_size( int *a, int len ); |
||||
ArrayType * _int_unique( ArrayType * a ); |
||||
|
||||
/*
|
||||
** GiST support methods |
||||
*/ |
||||
bool g_int_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy); |
||||
GISTENTRY * g_int_compress(GISTENTRY *entry); |
||||
GISTENTRY * g_int_decompress(GISTENTRY *entry); |
||||
float * g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result); |
||||
GIST_SPLITVEC * g_int_picksplit(bytea *entryvec, GIST_SPLITVEC *v); |
||||
bool g_int_internal_consistent(ArrayType *key, ArrayType *query, StrategyNumber strategy); |
||||
ArrayType * g_int_union(bytea *entryvec, int *sizep); |
||||
bool * g_int_same(ArrayType *b1, ArrayType *b2, bool *result); |
||||
|
||||
|
||||
/*
|
||||
** R-tree suport functions |
||||
*/ |
||||
bool inner_int_contains(ArrayType *a, ArrayType *b); |
||||
bool inner_int_overlap(ArrayType *a, ArrayType *b); |
||||
ArrayType * inner_int_union(ArrayType *a, ArrayType *b); |
||||
ArrayType * inner_int_inter(ArrayType *a, ArrayType *b); |
||||
|
||||
bool _int_different(ArrayType *a, ArrayType *b); |
||||
bool _int_same(ArrayType *a, ArrayType *b); |
||||
bool _int_contains(ArrayType *a, ArrayType *b); |
||||
bool _int_contained(ArrayType *a, ArrayType *b); |
||||
bool _int_overlap(ArrayType *a, ArrayType *b); |
||||
ArrayType * _int_union(ArrayType *a, ArrayType *b); |
||||
ArrayType * _int_inter(ArrayType *a, ArrayType *b); |
||||
void rt__int_size(ArrayType *a, float* sz); |
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* GiST functions |
||||
*****************************************************************************/ |
||||
|
||||
/*
|
||||
** The GiST Consistent method for _intments |
||||
** Should return false if for all data items x below entry, |
||||
** the predicate x op query == FALSE, where op is the oper |
||||
** corresponding to strategy in the pg_amop table. |
||||
*/ |
||||
bool
|
||||
g_int_consistent(GISTENTRY *entry, |
||||
ArrayType *query, |
||||
StrategyNumber strategy) |
||||
{ |
||||
|
||||
/* sort query for fast search, key is already sorted */ |
||||
if ( ARRISNULL( query ) ) return FALSE;
|
||||
PREPAREARR( query );
|
||||
/*
|
||||
** if entry is not leaf, use g_int_internal_consistent, |
||||
** else use g_int_leaf_consistent |
||||
*/ |
||||
return(g_int_internal_consistent((ArrayType *)(entry->pred), query, strategy)); |
||||
} |
||||
|
||||
/*
|
||||
** The GiST Union method for _intments |
||||
** returns the minimal set that encloses all the entries in entryvec |
||||
*/ |
||||
ArrayType * |
||||
g_int_union(bytea *entryvec, int *sizep) |
||||
{ |
||||
int numranges, i; |
||||
ArrayType *out = (ArrayType *)NULL; |
||||
ArrayType *tmp; |
||||
|
||||
numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY);
|
||||
tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred; |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "union %d", numranges); |
||||
#endif |
||||
|
||||
for (i = 1; i < numranges; i++) { |
||||
out = inner_int_union(tmp, (ArrayType *) |
||||
(((GISTENTRY *)(VARDATA(entryvec)))[i]).pred); |
||||
if (i > 1 && tmp) pfree(tmp); |
||||
tmp = out; |
||||
} |
||||
|
||||
*sizep = VARSIZE( out ); |
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "\t ENDunion %d %d", *sizep, ARRSIZE( out ) ); |
||||
#endif |
||||
if ( *sizep == 0 ) { |
||||
pfree( out ); |
||||
return NULL; |
||||
} |
||||
return(out); |
||||
} |
||||
|
||||
/*
|
||||
** GiST Compress and Decompress methods |
||||
*/ |
||||
GISTENTRY * |
||||
g_int_compress(GISTENTRY *entry) |
||||
{ |
||||
GISTENTRY *retval; |
||||
ArrayType * r; |
||||
int len; |
||||
int *dr; |
||||
int i,min,cand; |
||||
|
||||
retval = palloc(sizeof(GISTENTRY)); |
||||
if ( ! retval )
|
||||
elog(ERROR,"Can't allocate memory for compression"); |
||||
|
||||
if ( ARRISNULL( (ArrayType *) entry->pred ) ) { |
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE,"COMP IN: NULL");
|
||||
#endif |
||||
gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset,
|
||||
0, FALSE); |
||||
return( retval );
|
||||
} |
||||
|
||||
r = copy_intArrayType( (ArrayType *) entry->pred );
|
||||
if ( entry->leafkey ) PREPAREARR( r ); |
||||
len = ARRSIZE( r ); |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "COMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, len); |
||||
//printarr( r, len );
|
||||
#endif |
||||
|
||||
if ( len >= 2*MAXNUMRANGE ) { /*compress*/ |
||||
r = resize_intArrayType( r, 2*( len ) ); |
||||
|
||||
dr = ARRPTR( r ); |
||||
|
||||
for(i=len-1; i>=0;i--) |
||||
dr[2*i] = dr[2*i+1] = dr[i]; |
||||
|
||||
len *= 2; |
||||
cand = 1; |
||||
while( len > MAXNUMRANGE * 2 ) { |
||||
min = 0x7fffffff; |
||||
for( i=2; i<len;i+=2 ) |
||||
if ( min > (dr[i] - dr[i-1]) ) { |
||||
min = (dr[i] - dr[i-1]); |
||||
cand = i; |
||||
} |
||||
memmove( (void*)&dr[cand-1], (void*)&dr[cand+1], (len - cand - 1)*sizeof(int) ); |
||||
len -= 2; |
||||
} |
||||
r = resize_intArrayType(r, len ); |
||||
} |
||||
|
||||
gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE); |
||||
|
||||
return(retval); |
||||
} |
||||
|
||||
GISTENTRY * |
||||
g_int_decompress(GISTENTRY *entry) |
||||
{ |
||||
GISTENTRY *retval; |
||||
ArrayType * r;
|
||||
int *dr, lenr; |
||||
ArrayType * in;
|
||||
int lenin; |
||||
int *din; |
||||
int i,j; |
||||
|
||||
if ( entry->bytes < ARR_OVERHEAD( NDIM ) || ARRISNULL( (ArrayType *) entry->pred ) ) {
|
||||
retval = palloc(sizeof(GISTENTRY)); |
||||
if ( ! retval )
|
||||
elog(ERROR,"Can't allocate memory for decompression"); |
||||
gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset, 0, FALSE); |
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE,"DECOMP IN: NULL");
|
||||
#endif |
||||
return( retval );
|
||||
} |
||||
|
||||
|
||||
in = (ArrayType *) entry->pred;
|
||||
lenin = ARRSIZE(in); |
||||
din = ARRPTR(in); |
||||
|
||||
if ( lenin < 2*MAXNUMRANGE ) { /*not comressed value*/ |
||||
/* sometimes strange bytesize */ |
||||
gistentryinit(*entry, (char *)in, entry->rel, entry->page, entry->offset, VARSIZE( in ), FALSE); |
||||
return (entry); |
||||
} |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "DECOMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, lenin); |
||||
//printarr( in, lenin );
|
||||
#endif |
||||
|
||||
lenr = internal_size(din, lenin); |
||||
|
||||
r = new_intArrayType( lenr ); |
||||
dr = ARRPTR( r ); |
||||
|
||||
for(i=0;i<lenin;i+=2) |
||||
for(j=din[i]; j<=din[i+1]; j++) |
||||
if ( (!i) || *(dr-1) != j ) |
||||
*dr++ = j; |
||||
|
||||
retval = palloc(sizeof(GISTENTRY)); |
||||
if ( ! retval )
|
||||
elog(ERROR,"Can't allocate memory for decompression"); |
||||
gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE); |
||||
|
||||
return(retval); |
||||
} |
||||
|
||||
/*
|
||||
** The GiST Penalty method for _intments |
||||
*/ |
||||
float * |
||||
g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result) |
||||
{ |
||||
Datum ud; |
||||
float tmp1, tmp2; |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "penalty"); |
||||
#endif |
||||
ud = (Datum)inner_int_union((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred)); |
||||
rt__int_size((ArrayType *)ud, &tmp1); |
||||
rt__int_size((ArrayType *)(origentry->pred), &tmp2); |
||||
*result = tmp1 - tmp2; |
||||
pfree((char *)ud); |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "--penalty\t%g", *result); |
||||
#endif |
||||
|
||||
return(result); |
||||
} |
||||
|
||||
|
||||
|
||||
/*
|
||||
** The GiST PickSplit method for _intments |
||||
** We use Guttman's poly time split algorithm
|
||||
*/ |
||||
GIST_SPLITVEC * |
||||
g_int_picksplit(bytea *entryvec, |
||||
GIST_SPLITVEC *v) |
||||
{ |
||||
OffsetNumber i, j; |
||||
ArrayType *datum_alpha, *datum_beta; |
||||
ArrayType *datum_l, *datum_r; |
||||
ArrayType *union_d, *union_dl, *union_dr; |
||||
ArrayType *inter_d; |
||||
bool firsttime; |
||||
float size_alpha, size_beta, size_union, size_inter; |
||||
float size_waste, waste; |
||||
float size_l, size_r; |
||||
int nbytes; |
||||
OffsetNumber seed_1 = 0, seed_2 = 0; |
||||
OffsetNumber *left, *right; |
||||
OffsetNumber maxoff; |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)); |
||||
#endif |
||||
|
||||
maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2; |
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
||||
v->spl_left = (OffsetNumber *) palloc(nbytes); |
||||
v->spl_right = (OffsetNumber *) palloc(nbytes); |
||||
|
||||
firsttime = true; |
||||
waste = 0.0; |
||||
|
||||
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { |
||||
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); |
||||
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { |
||||
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred); |
||||
|
||||
/* compute the wasted space by unioning these guys */ |
||||
/* size_waste = size_union - size_inter; */ |
||||
union_d = (ArrayType *)inner_int_union(datum_alpha, datum_beta); |
||||
rt__int_size(union_d, &size_union); |
||||
inter_d = (ArrayType *)inner_int_inter(datum_alpha, datum_beta); |
||||
rt__int_size(inter_d, &size_inter); |
||||
size_waste = size_union - size_inter; |
||||
|
||||
pfree(union_d); |
||||
|
||||
if (inter_d != (ArrayType *) NULL) |
||||
pfree(inter_d); |
||||
|
||||
/*
|
||||
* are these a more promising split that what we've |
||||
* already seen? |
||||
*/ |
||||
|
||||
if (size_waste > waste || firsttime) { |
||||
waste = size_waste; |
||||
seed_1 = i; |
||||
seed_2 = j; |
||||
firsttime = false; |
||||
} |
||||
} |
||||
} |
||||
|
||||
left = v->spl_left; |
||||
v->spl_nleft = 0; |
||||
right = v->spl_right; |
||||
v->spl_nright = 0; |
||||
|
||||
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred); |
||||
datum_l = copy_intArrayType( datum_alpha );
|
||||
rt__int_size((ArrayType *)datum_l, &size_l); |
||||
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred); |
||||
datum_r = copy_intArrayType( datum_beta );
|
||||
rt__int_size((ArrayType *)datum_r, &size_r); |
||||
|
||||
/*
|
||||
* Now split up the regions between the two seeds. An important |
||||
* property of this split algorithm is that the split vector v |
||||
* has the indices of items to be split in order in its left and |
||||
* right vectors. We exploit this property by doing a merge in |
||||
* the code that actually splits the page. |
||||
* |
||||
* For efficiency, we also place the new index tuple in this loop. |
||||
* This is handled at the very end, when we have placed all the |
||||
* existing tuples and i == maxoff + 1. |
||||
*/ |
||||
|
||||
maxoff = OffsetNumberNext(maxoff); |
||||
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { |
||||
|
||||
|
||||
/*
|
||||
* If we've already decided where to place this item, just |
||||
* put it on the right list. Otherwise, we need to figure |
||||
* out which page needs the least enlargement in order to |
||||
* store the item. |
||||
*/ |
||||
|
||||
if (i == seed_1) { |
||||
*left++ = i; |
||||
v->spl_nleft++; |
||||
continue; |
||||
} else if (i == seed_2) { |
||||
*right++ = i; |
||||
v->spl_nright++; |
||||
continue; |
||||
} |
||||
|
||||
/* okay, which page needs least enlargement? */
|
||||
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); |
||||
union_dl = (ArrayType *)inner_int_union(datum_l, datum_alpha); |
||||
union_dr = (ArrayType *)inner_int_union(datum_r, datum_alpha); |
||||
rt__int_size((ArrayType *)union_dl, &size_alpha); |
||||
rt__int_size((ArrayType *)union_dr, &size_beta); |
||||
|
||||
/* pick which page to add it to */ |
||||
if (size_alpha - size_l < size_beta - size_r) { |
||||
if ( datum_l ) pfree(datum_l); |
||||
if ( union_dr ) pfree(union_dr); |
||||
datum_l = union_dl; |
||||
size_l = size_alpha; |
||||
*left++ = i; |
||||
v->spl_nleft++; |
||||
} else { |
||||
if ( datum_r ) pfree(datum_r); |
||||
if ( union_dl ) pfree(union_dl); |
||||
datum_r = union_dr; |
||||
size_r = size_beta; |
||||
*right++ = i; |
||||
v->spl_nright++; |
||||
} |
||||
} |
||||
/**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */ |
||||
|
||||
if ( *(left-1) > *(right-1) ) {
|
||||
*right = FirstOffsetNumber; |
||||
*(left-1) = InvalidOffsetNumber; |
||||
} else { |
||||
*left = FirstOffsetNumber; |
||||
*(right-1) = InvalidOffsetNumber; |
||||
} |
||||
|
||||
|
||||
v->spl_ldatum = (char *)datum_l; |
||||
v->spl_rdatum = (char *)datum_r; |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright); |
||||
#endif |
||||
return v; |
||||
} |
||||
|
||||
/*
|
||||
** Equality methods |
||||
*/ |
||||
|
||||
|
||||
bool * |
||||
g_int_same(ArrayType *b1, ArrayType *b2, bool *result) |
||||
{ |
||||
if (_int_same(b1, b2)) |
||||
*result = TRUE; |
||||
else *result = FALSE; |
||||
|
||||
return(result); |
||||
} |
||||
|
||||
bool
|
||||
g_int_internal_consistent(ArrayType *key, |
||||
ArrayType *query, |
||||
StrategyNumber strategy) |
||||
{ |
||||
bool retval; |
||||
|
||||
#ifdef GIST_QUERY_DEBUG |
||||
elog(NOTICE, "internal_consistent, %d", strategy); |
||||
#endif |
||||
|
||||
switch(strategy) { |
||||
case RTOverlapStrategyNumber: |
||||
retval = (bool)inner_int_overlap(key, query); |
||||
break; |
||||
case RTSameStrategyNumber: |
||||
case RTContainsStrategyNumber: |
||||
retval = (bool)inner_int_contains(key, query); |
||||
break; |
||||
case RTContainedByStrategyNumber: |
||||
retval = (bool)inner_int_overlap(key, query); |
||||
break; |
||||
default: |
||||
retval = FALSE; |
||||
} |
||||
return(retval); |
||||
} |
||||
|
||||
bool |
||||
_int_contained(ArrayType *a, ArrayType *b) |
||||
{ |
||||
return ( _int_contains(b, a) ); |
||||
} |
||||
|
||||
bool |
||||
_int_contains ( ArrayType *a, ArrayType *b ) { |
||||
bool res; |
||||
ArrayType *an, *bn; |
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; |
||||
|
||||
an = copy_intArrayType( a ); |
||||
bn = copy_intArrayType( b ); |
||||
|
||||
PREPAREARR(an); |
||||
PREPAREARR(bn); |
||||
|
||||
res = inner_int_contains( an, bn ); |
||||
pfree( an ); pfree( bn ); |
||||
return res; |
||||
} |
||||
|
||||
bool
|
||||
inner_int_contains ( ArrayType *a, ArrayType *b ) { |
||||
int na, nb; |
||||
int i,j, n; |
||||
int *da, *db; |
||||
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; |
||||
|
||||
na = ARRSIZE( a ); |
||||
nb = ARRSIZE( b );
|
||||
da = ARRPTR( a ); |
||||
db = ARRPTR( b ); |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "contains %d %d", na, nb); |
||||
#endif |
||||
|
||||
i = j = n = 0; |
||||
while( i<na && j<nb ) |
||||
if ( da[i] < db[j] ) |
||||
i++; |
||||
else if ( da[i] == db[j] ) { |
||||
n++; i++; j++; |
||||
} else
|
||||
j++; |
||||
|
||||
return ( n == nb ) ? TRUE : FALSE; |
||||
} |
||||
|
||||
/*****************************************************************************
|
||||
* Operator class for R-tree indexing |
||||
*****************************************************************************/ |
||||
|
||||
bool |
||||
_int_different(ArrayType *a, ArrayType *b) |
||||
{ |
||||
return ( !_int_same( a, b ) ); |
||||
} |
||||
|
||||
bool
|
||||
_int_same ( ArrayType *a, ArrayType *b ) { |
||||
int na , nb ; |
||||
int n;
|
||||
int *da, *db; |
||||
bool anull = ARRISNULL( a ); |
||||
bool bnull = ARRISNULL( b ); |
||||
|
||||
if ( anull || bnull )
|
||||
return ( anull && bnull ) ? TRUE : FALSE;
|
||||
|
||||
SORT( a ); |
||||
SORT( b );
|
||||
na = ARRSIZE( a ); |
||||
nb = ARRSIZE( b ); |
||||
da = ARRPTR( a ); |
||||
db = ARRPTR( b ); |
||||
|
||||
if ( na != nb ) return FALSE; |
||||
|
||||
n = 0; |
||||
for(n=0; n<na; n++) |
||||
if ( da[n] != db[n] ) |
||||
return FALSE; |
||||
|
||||
return TRUE;
|
||||
} |
||||
|
||||
/* _int_overlap -- does a overlap b?
|
||||
*/ |
||||
bool
|
||||
_int_overlap ( ArrayType *a, ArrayType *b ) { |
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; |
||||
|
||||
SORT(a); |
||||
SORT(b); |
||||
|
||||
return inner_int_overlap( a, b ); |
||||
} |
||||
|
||||
bool
|
||||
inner_int_overlap ( ArrayType *a, ArrayType *b ) { |
||||
int na , nb ; |
||||
int i,j; |
||||
int *da, *db; |
||||
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; |
||||
|
||||
na = ARRSIZE( a ); |
||||
nb = ARRSIZE( b ); |
||||
da = ARRPTR( a ); |
||||
db = ARRPTR( b ); |
||||
|
||||
#ifdef GIST_DEBUG |
||||
elog(NOTICE, "g_int_overlap"); |
||||
#endif |
||||
|
||||
i = j = 0; |
||||
while( i<na && j<nb ) |
||||
if ( da[i] < db[j] ) |
||||
i++; |
||||
else if ( da[i] == db[j] ) |
||||
return TRUE;
|
||||
else
|
||||
j++; |
||||
|
||||
return FALSE; |
||||
} |
||||
|
||||
ArrayType *
|
||||
_int_union ( ArrayType *a, ArrayType *b ) { |
||||
if ( ! ARRISNULL( a ) ) SORT(a); |
||||
if ( ! ARRISNULL( b ) ) SORT(b); |
||||
|
||||
return inner_int_union( a, b ); |
||||
} |
||||
|
||||
ArrayType *
|
||||
inner_int_union ( ArrayType *a, ArrayType *b ) { |
||||
ArrayType * r = NULL; |
||||
int na , nb; |
||||
int *da, *db, *dr; |
||||
int i,j; |
||||
|
||||
#ifdef GIST_DEBUG |
||||
//elog(NOTICE, "inner_union %d %d", ARRISNULL( a ) , ARRISNULL( b ) );
|
||||
#endif |
||||
|
||||
if ( ARRISNULL( a ) && ARRISNULL( b ) ) return new_intArrayType(0); |
||||
if ( ARRISNULL( a ) ) r = copy_intArrayType( b );
|
||||
if ( ARRISNULL( b ) ) r = copy_intArrayType( a );
|
||||
|
||||
if ( r ) {
|
||||
dr = ARRPTR( r ); |
||||
} else { |
||||
na = ARRSIZE( a ); |
||||
nb = ARRSIZE( b ); |
||||
da = ARRPTR( a ); |
||||
db = ARRPTR( b ); |
||||
|
||||
r = new_intArrayType( na + nb );
|
||||
dr = ARRPTR( r ); |
||||
|
||||
/* union */
|
||||
i = j = 0; |
||||
while( i<na && j<nb )
|
||||
if ( da[i] < db[j] ) |
||||
*dr++ = da[i++]; |
||||
else |
||||
*dr++ = db[j++]; |
||||
|
||||
while( i<na ) *dr++ = da[i++]; |
||||
while( j<nb ) *dr++ = db[j++]; |
||||
|
||||
}
|
||||
|
||||
if ( ARRSIZE(r) > 1 )
|
||||
r = _int_unique( r ); |
||||
|
||||
return r; |
||||
} |
||||
|
||||
|
||||
ArrayType *
|
||||
_int_inter ( ArrayType *a, ArrayType *b ) { |
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; |
||||
|
||||
SORT(a); |
||||
SORT(b); |
||||
|
||||
return inner_int_inter( a, b ); |
||||
} |
||||
|
||||
ArrayType *
|
||||
inner_int_inter ( ArrayType *a, ArrayType *b ) { |
||||
ArrayType * r; |
||||
int na , nb ; |
||||
int *da, *db, *dr; |
||||
int i,j; |
||||
|
||||
#ifdef GIST_DEBUG |
||||
//elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) );
|
||||
#endif |
||||
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return NULL; |
||||
|
||||
na = ARRSIZE( a ); |
||||
nb = ARRSIZE( b ); |
||||
da = ARRPTR( a ); |
||||
db = ARRPTR( b ); |
||||
r = new_intArrayType( min(na, nb) );
|
||||
dr = ARRPTR( r ); |
||||
|
||||
i = j = 0; |
||||
while( i<na && j<nb )
|
||||
if ( da[i] < db[j] ) |
||||
i++; |
||||
else if ( da[i] == db[j] ) {
|
||||
if ( i+j == 0 || ( i+j>0 && *(dr-1) != db[j] ) )
|
||||
*dr++ = db[j]; |
||||
i++; j++; |
||||
} else
|
||||
j++; |
||||
|
||||
if ( (dr - ARRPTR(r)) == 0 ) { |
||||
pfree( r ); |
||||
return NULL; |
||||
} else
|
||||
return resize_intArrayType(r, dr - ARRPTR(r) ); |
||||
} |
||||
|
||||
void |
||||
rt__int_size(ArrayType *a, float *size) |
||||
{ |
||||
if ( ARRISNULL( a ) ) |
||||
*size = 0.0; |
||||
else |
||||
*size = (float)ARRSIZE( a ); |
||||
|
||||
return; |
||||
} |
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* Miscellaneous operators and functions |
||||
*****************************************************************************/ |
||||
|
||||
/* len >= 2 */ |
||||
bool isort ( int *a, int len ) { |
||||
int tmp, index; |
||||
int *cur, *end; |
||||
bool r = FALSE; |
||||
end = a + len; |
||||
do { |
||||
index = 0; |
||||
cur = a + 1; |
||||
while( cur < end ) { |
||||
if( *(cur-1) > *cur ) { |
||||
tmp=*(cur-1); *(cur-1) = *cur; *cur=tmp; |
||||
index = 1; |
||||
} else if ( ! r && *(cur-1) == *cur ) |
||||
r = TRUE; |
||||
cur++; |
||||
} |
||||
} while( index ); |
||||
return r; |
||||
} |
||||
|
||||
ArrayType * new_intArrayType( int num ) { |
||||
ArrayType * r; |
||||
int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num; |
||||
|
||||
r = (ArrayType *) palloc( nbytes ); |
||||
if ( ! r ) |
||||
elog(ERROR, "Can't allocate memory for new array"); |
||||
MemSet(r, 0, nbytes); |
||||
r->size = nbytes; |
||||
r->ndim = NDIM; |
||||
#ifndef PGSQL71 |
||||
SET_LO_FLAG(false, r); |
||||
#endif |
||||
*( (int*)ARR_DIMS(r) ) = num; |
||||
*( (int*)ARR_LBOUND(r) ) = 1; |
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
ArrayType * resize_intArrayType( ArrayType * a, int num ) { |
||||
int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num; |
||||
|
||||
if ( num == ARRSIZE(a) ) return a; |
||||
|
||||
a = (ArrayType *) repalloc( a, nbytes ); |
||||
if ( ! a ) |
||||
elog(ERROR, "Can't reallocate memory for new array"); |
||||
|
||||
a->size = nbytes; |
||||
*( (int*)ARR_DIMS(a) ) = num;
|
||||
return a; |
||||
} |
||||
|
||||
ArrayType * copy_intArrayType( ArrayType * a ) { |
||||
ArrayType * r; |
||||
if ( ! a ) return NULL; |
||||
r = new_intArrayType( ARRSIZE(a) ); |
||||
memmove(r,a,VARSIZE(a)); |
||||
return r; |
||||
} |
||||
|
||||
/* num for compressed key */ |
||||
int internal_size (int *a, int len ) { |
||||
int i,size=0; |
||||
|
||||
for(i=0;i<len;i+=2) |
||||
if ( ! i || a[i] != a[i-1] ) /* do not count repeated range */ |
||||
size += a[i+1] - a[i] + 1; |
||||
|
||||
return size; |
||||
} |
||||
|
||||
/* r is sorted and size of r > 1 */ |
||||
ArrayType * _int_unique( ArrayType * r ) { |
||||
int *tmp, *dr, *data; |
||||
int num = ARRSIZE(r); |
||||
data = tmp = dr = ARRPTR( r ); |
||||
while( tmp - data < num )
|
||||
if ( *tmp != *dr )
|
||||
*(++dr) = *tmp++; |
||||
else
|
||||
tmp++;
|
||||
return resize_intArrayType(r, dr + 1 - ARRPTR(r) ); |
||||
}
|
@ -0,0 +1,211 @@ |
||||
-- Create the user-defined type for the 1-D frloating point indervals (_int4) |
||||
-- |
||||
BEGIN TRANSACTION; |
||||
|
||||
-- |
||||
-- External C-functions for R-tree methods |
||||
-- |
||||
|
||||
-- Comparison methods |
||||
|
||||
CREATE FUNCTION _int_contains(_int4, _int4) RETURNS bool |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
INSERT INTO pg_description (objoid, description) |
||||
SELECT oid, 'contains'::text |
||||
FROM pg_proc |
||||
WHERE proname = '_int_contains'::name; |
||||
|
||||
CREATE FUNCTION _int_contained(_int4, _int4) RETURNS bool |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
INSERT INTO pg_description (objoid, description) |
||||
SELECT oid, 'contained in'::text |
||||
FROM pg_proc |
||||
WHERE proname = '_int_contained'::name; |
||||
|
||||
CREATE FUNCTION _int_overlap(_int4, _int4) RETURNS bool |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
INSERT INTO pg_description (objoid, description) |
||||
SELECT oid, 'overlaps'::text |
||||
FROM pg_proc |
||||
WHERE proname = '_int_overlap'::name; |
||||
|
||||
CREATE FUNCTION _int_same(_int4, _int4) RETURNS bool |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
INSERT INTO pg_description (objoid, description) |
||||
SELECT oid, 'same as'::text |
||||
FROM pg_proc |
||||
WHERE proname = '_int_same'::name; |
||||
|
||||
CREATE FUNCTION _int_different(_int4, _int4) RETURNS bool |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
INSERT INTO pg_description (objoid, description) |
||||
SELECT oid, 'different'::text |
||||
FROM pg_proc |
||||
WHERE proname = '_int_different'::name; |
||||
|
||||
-- support routines for indexing |
||||
|
||||
CREATE FUNCTION _int_union(_int4, _int4) RETURNS _int4 |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION _int_inter(_int4, _int4) RETURNS _int4 |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
-- |
||||
-- OPERATORS |
||||
-- |
||||
|
||||
CREATE OPERATOR && ( |
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_overlap, |
||||
COMMUTATOR = '&&', |
||||
RESTRICT = contsel, JOIN = contjoinsel |
||||
); |
||||
|
||||
--CREATE OPERATOR = ( |
||||
-- LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_same, |
||||
-- COMMUTATOR = '=', NEGATOR = '<>', |
||||
-- RESTRICT = eqsel, JOIN = eqjoinsel, |
||||
-- SORT1 = '<', SORT2 = '<' |
||||
--); |
||||
|
||||
CREATE OPERATOR <> ( |
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_different, |
||||
COMMUTATOR = '<>', NEGATOR = '=', |
||||
RESTRICT = neqsel, JOIN = neqjoinsel |
||||
); |
||||
|
||||
CREATE OPERATOR @ ( |
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_contains, |
||||
COMMUTATOR = '~', RESTRICT = contsel, JOIN = contjoinsel |
||||
); |
||||
|
||||
CREATE OPERATOR ~ ( |
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_contained, |
||||
COMMUTATOR = '@', RESTRICT = contsel, JOIN = contjoinsel |
||||
); |
||||
|
||||
|
||||
-- define the GiST support methods |
||||
CREATE FUNCTION g_int_consistent(opaque,_int4,int4) RETURNS bool |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION g_int_compress(opaque) RETURNS opaque |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION g_int_decompress(opaque) RETURNS opaque |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION g_int_penalty(opaque,opaque,opaque) RETURNS opaque |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION g_int_picksplit(opaque, opaque) RETURNS opaque |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION g_int_union(bytea, opaque) RETURNS _int4 |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
CREATE FUNCTION g_int_same(_int4, _int4, opaque) RETURNS opaque |
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c'; |
||||
|
||||
|
||||
-- register the default opclass for indexing |
||||
INSERT INTO pg_opclass (opcname, opcdeftype) |
||||
SELECT 'gist__int_ops', oid |
||||
FROM pg_type |
||||
WHERE typname = '_int4'; |
||||
|
||||
|
||||
-- get the comparators for _intments and store them in a tmp table |
||||
SELECT o.oid AS opoid, o.oprname |
||||
INTO TABLE _int_ops_tmp |
||||
FROM pg_operator o, pg_type t |
||||
WHERE o.oprleft = t.oid and o.oprright = t.oid |
||||
and t.typname = '_int4'; |
||||
|
||||
-- make sure we have the right operators |
||||
-- SELECT * from _int_ops_tmp; |
||||
|
||||
-- using the tmp table, generate the amop entries |
||||
|
||||
-- _int_overlap |
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) |
||||
SELECT am.oid, opcl.oid, c.opoid, 3 |
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and c.oprname = '&&'; |
||||
|
||||
-- _int_same |
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) |
||||
SELECT am.oid, opcl.oid, c.opoid, 6 |
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and c.oprname = '='; |
||||
|
||||
-- _int_contains |
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) |
||||
SELECT am.oid, opcl.oid, c.opoid, 7 |
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and c.oprname = '@'; |
||||
|
||||
-- _int_contained |
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) |
||||
SELECT am.oid, opcl.oid, c.opoid, 8 |
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and c.oprname = '~'; |
||||
|
||||
DROP TABLE _int_ops_tmp; |
||||
|
||||
|
||||
-- add the entries to amproc for the support methods |
||||
-- note the amprocnum numbers associated with each are specific! |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 1 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_consistent'; |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 2 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_union'; |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 3 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_compress'; |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 4 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_decompress'; |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 5 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_penalty'; |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 6 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_picksplit'; |
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) |
||||
SELECT am.oid, opcl.oid, pro.oid, 7 |
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro |
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops' |
||||
and proname = 'g_int_same'; |
||||
|
||||
END TRANSACTION; |
@ -0,0 +1,104 @@ |
||||
#!/usr/bin/perl |
||||
|
||||
use strict; |
||||
# make sure we are in a sane environment. |
||||
use DBI(); |
||||
use DBD::Pg(); |
||||
use Time::HiRes qw( usleep ualarm gettimeofday tv_interval ); |
||||
use Getopt::Std; |
||||
|
||||
my %opt; |
||||
getopts('d:b:s:veorauc', \%opt); |
||||
|
||||
if ( !( scalar %opt && defined $opt{s} ) ) { |
||||
print <<EOT; |
||||
Usage: |
||||
$0 -d DATABASE -s SECTIONS [-b NUMBER] [-v] [-e] [-o] [-r] [-a] [-u] |
||||
-d DATABASE -DATABASE |
||||
-b NUMBER -number of repeats |
||||
-s SECTIONS -sections, format sid1[,sid2[,sid3[...]]]] |
||||
-v -verbose (show SQL) |
||||
-e -show explain |
||||
-r -use RD-tree index |
||||
-a -AND section |
||||
-o -show output |
||||
-u -unique |
||||
-c -count |
||||
|
||||
EOT |
||||
exit; |
||||
} |
||||
|
||||
$opt{d} ||= '_int4'; |
||||
my $dbi=DBI->connect('DBI:Pg:dbname='.$opt{d}); |
||||
|
||||
my %table; |
||||
my @where; |
||||
|
||||
$table{message}=1; |
||||
|
||||
if ( $opt{a} ) { |
||||
if ( $opt{r} ) { |
||||
push @where, "message.sections @ '{$opt{s}}'"; |
||||
} else { |
||||
foreach my $sid ( split(/[,\s]+/, $opt{s} )) { |
||||
push @where, "EXISTS ( select message_section_map.mid from message_section_map where message.mid=message_section_map.mid and message_section_map.sid = $sid )"; |
||||
} |
||||
} |
||||
} else { |
||||
if ( $opt{r} ) { |
||||
push @where, "message.sections && '{$opt{s}}'"; |
||||
} else { |
||||
$table{message_section_map} = 1; |
||||
push @where, "message.mid = message_section_map.mid"; |
||||
push @where, "message_section_map.sid in ($opt{s})"; |
||||
} |
||||
} |
||||
|
||||
my $outf; |
||||
if ( $opt{c} ) { |
||||
$outf = ( $opt{u} ) ? 'count( distinct message.mid )' : 'count( message.mid )'; |
||||
} else { |
||||
$outf = ( $opt{u} ) ? 'distinct( message.mid )' : 'message.mid'; |
||||
} |
||||
my $sql = "select $outf from ".join(', ', keys %table)." where ".join(' AND ', @where).';'; |
||||
|
||||
if ( $opt{v} ) { |
||||
print "$sql\n"; |
||||
} |
||||
|
||||
if ( $opt{e} ) { |
||||
$dbi->do("explain $sql"); |
||||
} |
||||
|
||||
my $t0 = [gettimeofday]; |
||||
my $count=0; |
||||
my $b=$opt{b}; |
||||
$b||=1; |
||||
my @a; |
||||
foreach ( 1..$b ) { |
||||
@a=exec_sql($dbi,$sql); |
||||
$count=$#a; |
||||
} |
||||
my $elapsed = tv_interval ( $t0, [gettimeofday]); |
||||
if ( $opt{o} ) { |
||||
foreach ( @a ) { |
||||
print "$_->{mid}\t$_->{sections}\n"; |
||||
} |
||||
} |
||||
print sprintf("total: %.02f sec; number: %d; for one: %.03f sec; found %d docs\n", $elapsed, $b, $elapsed/$b, $count+1 ); |
||||
$dbi -> disconnect; |
||||
|
||||
sub exec_sql { |
||||
my ($dbi, $sql, @keys) = @_; |
||||
my $sth=$dbi->prepare($sql) || die; |
||||
$sth->execute( @keys ) || die; |
||||
my $r; |
||||
my @row; |
||||
while ( defined ( $r=$sth->fetchrow_hashref ) ) { |
||||
push @row, $r; |
||||
} |
||||
$sth->finish; |
||||
return @row; |
||||
} |
||||
|
@ -0,0 +1,73 @@ |
||||
#!/usr/bin/perl |
||||
|
||||
use strict; |
||||
print <<EOT; |
||||
create table message ( |
||||
mid int not null, |
||||
sections int[] |
||||
); |
||||
create table message_section_map ( |
||||
mid int not null, |
||||
sid int not null |
||||
); |
||||
|
||||
EOT |
||||
|
||||
open(MSG,">message.tmp") || die; |
||||
open(MAP,">message_section_map.tmp") || die; |
||||
|
||||
srand( 1 ); |
||||
#foreach my $i ( 1..1778 ) { |
||||
#foreach my $i ( 1..3443 ) { |
||||
#foreach my $i ( 1..5000 ) { |
||||
#foreach my $i ( 1..29362 ) { |
||||
#foreach my $i ( 1..33331 ) { |
||||
#foreach my $i ( 1..83268 ) { |
||||
foreach my $i ( 1..200000 ) { |
||||
my @sect; |
||||
if ( rand() < 0.7 ) { |
||||
$sect[0] = int( (rand()**4)*100 ); |
||||
} else { |
||||
my %hash; |
||||
@sect = grep { $hash{$_}++; $hash{$_} <= 1 } map { int( (rand()**4)*100) } 0..( int(rand()*5) ); |
||||
} |
||||
if ( $#sect < 0 || rand() < 0.1 ) { |
||||
print MSG "$i\t\\N\n"; |
||||
} else { |
||||
print MSG "$i\t{".join(',',@sect)."}\n"; |
||||
map { print MAP "$i\t$_\n" } @sect; |
||||
} |
||||
} |
||||
close MAP; |
||||
close MSG; |
||||
|
||||
copytable('message'); |
||||
copytable('message_section_map'); |
||||
|
||||
print <<EOT; |
||||
|
||||
CREATE unique index message_key on message ( mid ); |
||||
--CREATE unique index message_section_map_key1 on message_section_map ( mid, sid ); |
||||
CREATE unique index message_section_map_key2 on message_section_map ( sid, mid ); |
||||
CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy ); |
||||
VACUUM ANALYZE; |
||||
|
||||
select count(*) from message; |
||||
select count(*) from message_section_map; |
||||
|
||||
|
||||
|
||||
EOT |
||||
|
||||
|
||||
unlink 'message.tmp', 'message_section_map.tmp'; |
||||
|
||||
sub copytable { |
||||
my $t = shift; |
||||
|
||||
print "COPY $t from stdin;\n"; |
||||
open( FFF, "$t.tmp") || die; |
||||
while(<FFF>) { print; } |
||||
close FFF; |
||||
print "\\.\n"; |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,19 @@ |
||||
-- |
||||
-- first, define the datatype. Turn off echoing so that expected file |
||||
-- does not depend on contents of seg.sql. |
||||
-- |
||||
\set ECHO none |
||||
CREATE TABLE test__int( a int[] ); |
||||
\copy test__int from 'data/test__int.data' |
||||
SELECT count(*) from test__int WHERE a && '{23,50}'; |
||||
count |
||||
------- |
||||
345 |
||||
(1 row) |
||||
|
||||
SELECT count(*) from test__int WHERE a @ '{23,50}'; |
||||
count |
||||
------- |
||||
12 |
||||
(1 row) |
||||
|
@ -0,0 +1,15 @@ |
||||
-- |
||||
-- first, define the datatype. Turn off echoing so that expected file |
||||
-- does not depend on contents of seg.sql. |
||||
-- |
||||
\set ECHO none |
||||
\i _int.sql |
||||
\set ECHO all |
||||
|
||||
CREATE TABLE test__int( a int[] ); |
||||
|
||||
\copy test__int from 'data/test__int.data' |
||||
|
||||
SELECT count(*) from test__int WHERE a && '{23,50}'; |
||||
SELECT count(*) from test__int WHERE a @ '{23,50}'; |
||||
|
Loading…
Reference in new issue