mirror of https://github.com/postgres/postgres
parent
e105f9a119
commit
2146d8c6a0
@ -0,0 +1,31 @@ |
||||
#############################################
|
||||
# Makefile for integer aggregator
|
||||
# Copyright (C) 2001 Digital Music Network.
|
||||
# by Mark L. Woodward
|
||||
#
|
||||
subdir = contrib/intagg
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
|
||||
NAME=int_aggregate
|
||||
SONAME = $(NAME)$(DLSUFFIX)
|
||||
MODULES = int_aggregate
|
||||
DATA_built = int_aggregate.so
|
||||
DOCS = README.int_aggrigate
|
||||
SQLS=int_aggregate.sql
|
||||
|
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
|
||||
%.sql: %.sql.in |
||||
sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
|
||||
|
||||
all : $(SONAME) $(SQLS) |
||||
|
||||
|
||||
install : all |
||||
$(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
|
||||
|
||||
|
||||
clean : |
||||
rm -f $(SONAME)
|
||||
rm -f $(SQLS)
|
@ -0,0 +1,55 @@ |
||||
Integer aggregator/enumerator. |
||||
|
||||
Many database systems have the notion of a one to many table. |
||||
|
||||
A one to many table usually sits between two indexed tables, |
||||
as: |
||||
|
||||
create table one_to_many(left int, right int) ; |
||||
|
||||
And it is used like this: |
||||
|
||||
SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right) |
||||
WHERE one_to_many.left = item; |
||||
|
||||
This will return all the items in the right hand table for an entry |
||||
in the left hand table. This is a very common construct in SQL. |
||||
|
||||
Now, this methodology can be cumbersome with a very large number of |
||||
entries in the one_to_many table. Depending on the order in which |
||||
data was entered, a join like this could result in an index scan |
||||
and a fetch for each right hand entry in the table for a particular |
||||
left hand entry. |
||||
|
||||
If you have a very dynamic system, there is not much you can do. |
||||
However, if you have some data which is fairly static, you can |
||||
create a summary table with the aggregator. |
||||
|
||||
CREATE TABLE summary as SELECT left, int_array_aggregate(right) |
||||
AS right FROM one_to_many GROUP BY left; |
||||
|
||||
This will create a table with one row per left item, and an array |
||||
of right items. Now this is pretty useless without some way of using |
||||
the array, thats why there is an array enumerator. |
||||
|
||||
SELECT left, int_array_enum(right) FROM summary WHERE left = item; |
||||
|
||||
The above query using int_array_enum, produces the same results as: |
||||
|
||||
SELECT left, right FROM one_to_many WHERE left = item; |
||||
|
||||
The difference is that the query against the summary table has to get |
||||
only one row from the table, where as the query against "one_to_many" |
||||
must index scan and fetch a row for each entry. |
||||
|
||||
On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced |
||||
to a cost of 329. The query is a join between the one_to_many table, |
||||
|
||||
select right, count(right) from |
||||
( |
||||
select left, int_array_enum(right) as right from summary join |
||||
(select left from left_table where left = item) as lefts |
||||
ON (summary.left = lefts.left ) |
||||
) as list group by right order by count desc ; |
||||
|
||||
|
@ -0,0 +1,271 @@ |
||||
/*
|
||||
* Integer array aggregator / enumerator |
||||
* |
||||
* Mark L. Woodward
|
||||
* DMN Digital Music Network. |
||||
* www.dmn.com |
||||
* |
||||
* Copyright (C) Digital Music Network |
||||
* December 20, 2001 |
||||
* |
||||
* This file is the property of the Digital Music Network (DMN). |
||||
* It is being made available to users of the PostgreSQL system |
||||
* under the BSD license. |
||||
* |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include <ctype.h> |
||||
#include <stdio.h> |
||||
#include <sys/types.h> |
||||
#include <string.h> |
||||
#include "postgres.h" |
||||
#include "access/heapam.h" |
||||
#include "catalog/catname.h" |
||||
#include "catalog/indexing.h" |
||||
#include "catalog/pg_proc.h" |
||||
#include "executor/executor.h" |
||||
#include "utils/fcache.h" |
||||
#include "utils/sets.h" |
||||
#include "utils/syscache.h" |
||||
#include "access/tupmacs.h" |
||||
#include "access/xact.h" |
||||
#include "fmgr.h" |
||||
#include "miscadmin.h" |
||||
#include "utils/array.h" |
||||
#include "utils/builtins.h" |
||||
#include "utils/memutils.h" |
||||
#include "utils/lsyscache.h" |
||||
|
||||
|
||||
/* This is actually a postgres version of a one dimentional array */ |
||||
|
||||
typedef struct agg |
||||
{ |
||||
ArrayType a; |
||||
int items; |
||||
int lower; |
||||
int4 array[1]; |
||||
}PGARRAY; |
||||
|
||||
/* This is used to keep track of our position during enumeration */ |
||||
typedef struct callContext |
||||
{ |
||||
PGARRAY *p; |
||||
int num; |
||||
int flags; |
||||
}CTX; |
||||
|
||||
#define TOASTED 1 |
||||
#define START_NUM 8 |
||||
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4))) |
||||
|
||||
static PGARRAY * GetPGArray(int4 state, int fAdd); |
||||
static PGARRAY *ShrinkPGArray(PGARRAY *p); |
||||
|
||||
Datum int_agg_state(PG_FUNCTION_ARGS); |
||||
Datum int_agg_final_count(PG_FUNCTION_ARGS); |
||||
Datum int_agg_final_array(PG_FUNCTION_ARGS); |
||||
Datum int_enum(PG_FUNCTION_ARGS); |
||||
|
||||
PG_FUNCTION_INFO_V1(int_agg_state); |
||||
PG_FUNCTION_INFO_V1(int_agg_final_count); |
||||
PG_FUNCTION_INFO_V1(int_agg_final_array); |
||||
PG_FUNCTION_INFO_V1(int_enum); |
||||
|
||||
/*
|
||||
* Manage the aggregation state of the array
|
||||
* You need to specify the correct memory context, or it will vanish!
|
||||
*/ |
||||
static PGARRAY * GetPGArray(int4 state, int fAdd) |
||||
{ |
||||
PGARRAY *p = (PGARRAY *) state; |
||||
|
||||
if(!state) |
||||
{ |
||||
/* New array */ |
||||
int cb = PGARRAY_SIZE(START_NUM); |
||||
|
||||
p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb); |
||||
|
||||
if(!p) |
||||
{ |
||||
elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory"); |
||||
return 0; |
||||
} |
||||
|
||||
p->a.size = cb; |
||||
p->a.ndim= 0; |
||||
p->a.flags = 0; |
||||
p->items = 0; |
||||
p->lower= START_NUM; |
||||
} |
||||
else if(fAdd) |
||||
{ /* Ensure array has space */ |
||||
if(p->items >= p->lower) |
||||
{ |
||||
PGARRAY *pn; |
||||
int n = p->lower + p->lower; |
||||
int cbNew = PGARRAY_SIZE(n); |
||||
|
||||
pn = (PGARRAY *) repalloc(p, cbNew); |
||||
|
||||
if(!pn) |
||||
{ /* Realloc failed! Reallocate new block. */ |
||||
pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew); |
||||
if(!pn) |
||||
{ |
||||
elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory"); |
||||
return (PGARRAY *) NULL; |
||||
} |
||||
memcpy(pn, p, p->a.size); |
||||
pfree(p); |
||||
} |
||||
pn->a.size = cbNew; |
||||
pn->lower = n; |
||||
return pn; |
||||
} |
||||
} |
||||
return p; |
||||
} |
||||
|
||||
/* Shrinks the array to its actual size and moves it into the standard
|
||||
* memory allocation context, frees working memory */ |
||||
static PGARRAY *ShrinkPGArray(PGARRAY *p) |
||||
{ |
||||
PGARRAY *pnew=NULL; |
||||
if(p) |
||||
{ |
||||
/* get target size */ |
||||
int cb = PGARRAY_SIZE(p->items); |
||||
|
||||
/* use current transaction context */ |
||||
pnew = palloc(cb); |
||||
|
||||
if(pnew) |
||||
{ |
||||
/* Fix up the fields in the new structure, so Postgres understands */ |
||||
memcpy(pnew, p, cb); |
||||
pnew->a.size = cb; |
||||
pnew->a.ndim=1; |
||||
pnew->a.flags = 0; |
||||
pnew->lower = 0; |
||||
} |
||||
else |
||||
{ |
||||
elog(ERROR, "Integer aggregator, can't allocate memory"); |
||||
} |
||||
pfree(p); |
||||
}
|
||||
return pnew; |
||||
} |
||||
|
||||
/* Called for each iteration during an aggregate function */ |
||||
Datum int_agg_state(PG_FUNCTION_ARGS) |
||||
{ |
||||
int4 state = PG_GETARG_INT32(0); |
||||
int4 value = PG_GETARG_INT32(1); |
||||
|
||||
PGARRAY *p = GetPGArray(state, 1); |
||||
if(!p) |
||||
{ |
||||
elog(ERROR,"No aggregate storage\n"); |
||||
} |
||||
else if(p->items >= p->lower) |
||||
{ |
||||
elog(ERROR,"aggregate storage too small\n"); |
||||
} |
||||
else |
||||
{ |
||||
p->array[p->items++]= value; |
||||
} |
||||
PG_RETURN_INT32(p); |
||||
} |
||||
|
||||
/* This is the final function used for the integer aggregator. It returns all the integers
|
||||
* collected as a one dimentional integer array */ |
||||
Datum int_agg_final_array(PG_FUNCTION_ARGS) |
||||
{ |
||||
PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0)); |
||||
if(pnew) |
||||
{ |
||||
PG_RETURN_POINTER(pnew); |
||||
} |
||||
else |
||||
{ |
||||
PG_RETURN_NULL(); |
||||
} |
||||
} |
||||
|
||||
/* This function accepts an array, and returns one item for each entry in the array */ |
||||
Datum int_enum(PG_FUNCTION_ARGS) |
||||
{ |
||||
CTX *pc; |
||||
PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0); |
||||
ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo; |
||||
|
||||
if(!p) |
||||
{ |
||||
elog(NOTICE, "No data sent\n"); |
||||
return 0; |
||||
} |
||||
if(!rsi) |
||||
{ |
||||
elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer"); |
||||
PG_RETURN_NULL(); |
||||
|
||||
} |
||||
if(!fcinfo->context) |
||||
{ |
||||
/* Allocate a working context */ |
||||
pc = (CTX *) palloc(sizeof(CTX)); |
||||
|
||||
if(!pc) |
||||
{ |
||||
elog(ERROR, "CTX Alocation failed\n"); |
||||
PG_RETURN_NULL(); |
||||
} |
||||
|
||||
/* Don't copy atribute if you don't need too */ |
||||
if(VARATT_IS_EXTENDED(p) ) |
||||
{ |
||||
/* Toasted!!! */ |
||||
pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p); |
||||
pc->flags = TOASTED; |
||||
if(!pc->p) |
||||
{ |
||||
elog(ERROR, "Error in toaster!!! no detoasting\n"); |
||||
PG_RETURN_NULL(); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
/* Untoasted */ |
||||
pc->p = p; |
||||
pc->flags = 0; |
||||
} |
||||
fcinfo->context = (Node *) pc; |
||||
pc->num=0; |
||||
} |
||||
else /* use an existing one */ |
||||
{ |
||||
pc = (CTX *) fcinfo->context; |
||||
} |
||||
/* Are we done yet? */ |
||||
if(pc->num >= pc->p->items) |
||||
{ |
||||
/* We are done */ |
||||
if(pc->flags & TOASTED) |
||||
pfree(pc->p); |
||||
pfree(fcinfo->context); |
||||
fcinfo->context = NULL; |
||||
rsi->isDone = ExprEndResult ; |
||||
} |
||||
else /* nope, return the next value */ |
||||
{ |
||||
int val = pc->p->array[pc->num++]; |
||||
rsi->isDone = ExprMultipleResult; |
||||
PG_RETURN_INT32(val); |
||||
} |
||||
PG_RETURN_NULL(); |
||||
} |
@ -0,0 +1,40 @@ |
||||
-- Drop functions |
||||
drop function int_agg_state (int4, int4); |
||||
drop function int_agg_final_array (int4); |
||||
drop aggregate int_array_aggregate(int4); |
||||
drop function int_array_enum (int4[]); |
||||
|
||||
|
||||
-- Internal function for the aggregate |
||||
-- Is called for each item in an aggregation |
||||
create function int_agg_state (int4, int4) |
||||
returns int4 |
||||
as 'MODULE_FILENAME','int_agg_state' |
||||
language 'c'; |
||||
|
||||
-- Internal function for the aggregate |
||||
-- Is called at the end of the aggregation, and returns an array. |
||||
create function int_agg_final_array (int4) |
||||
returns int4[] |
||||
as 'MODULE_FILENAME','int_agg_final_array' |
||||
language 'c'; |
||||
|
||||
-- The aggration funcion. |
||||
-- uses the above functions to create an array of integers from an aggregation. |
||||
create aggregate int_array_aggregate |
||||
( |
||||
BASETYPE = int4, |
||||
SFUNC = int_agg_state, |
||||
STYPE = int4, |
||||
FINALFUNC = int_agg_final_array, |
||||
INITCOND = 0 |
||||
); |
||||
|
||||
-- The enumeration function |
||||
-- returns each element in a one dimentional integer array |
||||
-- as a row. |
||||
create function int_array_enum(int4[]) |
||||
returns setof integer |
||||
as 'MODULE_FILENAME','int_enum' |
||||
language 'c'; |
||||
|
Loading…
Reference in new issue