mirror of https://github.com/postgres/postgres
parent
e105f9a119
commit
2146d8c6a0
@ -0,0 +1,31 @@ |
|||||||
|
#############################################
|
||||||
|
# Makefile for integer aggregator
|
||||||
|
# Copyright (C) 2001 Digital Music Network.
|
||||||
|
# by Mark L. Woodward
|
||||||
|
#
|
||||||
|
subdir = contrib/intagg
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global |
||||||
|
|
||||||
|
NAME=int_aggregate
|
||||||
|
SONAME = $(NAME)$(DLSUFFIX)
|
||||||
|
MODULES = int_aggregate
|
||||||
|
DATA_built = int_aggregate.so
|
||||||
|
DOCS = README.int_aggrigate
|
||||||
|
SQLS=int_aggregate.sql
|
||||||
|
|
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk |
||||||
|
|
||||||
|
%.sql: %.sql.in |
||||||
|
sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
|
||||||
|
|
||||||
|
all : $(SONAME) $(SQLS) |
||||||
|
|
||||||
|
|
||||||
|
install : all |
||||||
|
$(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
|
||||||
|
|
||||||
|
|
||||||
|
clean : |
||||||
|
rm -f $(SONAME)
|
||||||
|
rm -f $(SQLS)
|
@ -0,0 +1,55 @@ |
|||||||
|
Integer aggregator/enumerator. |
||||||
|
|
||||||
|
Many database systems have the notion of a one to many table. |
||||||
|
|
||||||
|
A one to many table usually sits between two indexed tables, |
||||||
|
as: |
||||||
|
|
||||||
|
create table one_to_many(left int, right int) ; |
||||||
|
|
||||||
|
And it is used like this: |
||||||
|
|
||||||
|
SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right) |
||||||
|
WHERE one_to_many.left = item; |
||||||
|
|
||||||
|
This will return all the items in the right hand table for an entry |
||||||
|
in the left hand table. This is a very common construct in SQL. |
||||||
|
|
||||||
|
Now, this methodology can be cumbersome with a very large number of |
||||||
|
entries in the one_to_many table. Depending on the order in which |
||||||
|
data was entered, a join like this could result in an index scan |
||||||
|
and a fetch for each right hand entry in the table for a particular |
||||||
|
left hand entry. |
||||||
|
|
||||||
|
If you have a very dynamic system, there is not much you can do. |
||||||
|
However, if you have some data which is fairly static, you can |
||||||
|
create a summary table with the aggregator. |
||||||
|
|
||||||
|
CREATE TABLE summary as SELECT left, int_array_aggregate(right) |
||||||
|
AS right FROM one_to_many GROUP BY left; |
||||||
|
|
||||||
|
This will create a table with one row per left item, and an array |
||||||
|
of right items. Now this is pretty useless without some way of using |
||||||
|
the array, thats why there is an array enumerator. |
||||||
|
|
||||||
|
SELECT left, int_array_enum(right) FROM summary WHERE left = item; |
||||||
|
|
||||||
|
The above query using int_array_enum, produces the same results as: |
||||||
|
|
||||||
|
SELECT left, right FROM one_to_many WHERE left = item; |
||||||
|
|
||||||
|
The difference is that the query against the summary table has to get |
||||||
|
only one row from the table, where as the query against "one_to_many" |
||||||
|
must index scan and fetch a row for each entry. |
||||||
|
|
||||||
|
On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced |
||||||
|
to a cost of 329. The query is a join between the one_to_many table, |
||||||
|
|
||||||
|
select right, count(right) from |
||||||
|
( |
||||||
|
select left, int_array_enum(right) as right from summary join |
||||||
|
(select left from left_table where left = item) as lefts |
||||||
|
ON (summary.left = lefts.left ) |
||||||
|
) as list group by right order by count desc ; |
||||||
|
|
||||||
|
|
@ -0,0 +1,271 @@ |
|||||||
|
/*
|
||||||
|
* Integer array aggregator / enumerator |
||||||
|
* |
||||||
|
* Mark L. Woodward
|
||||||
|
* DMN Digital Music Network. |
||||||
|
* www.dmn.com |
||||||
|
* |
||||||
|
* Copyright (C) Digital Music Network |
||||||
|
* December 20, 2001 |
||||||
|
* |
||||||
|
* This file is the property of the Digital Music Network (DMN). |
||||||
|
* It is being made available to users of the PostgreSQL system |
||||||
|
* under the BSD license. |
||||||
|
* |
||||||
|
*/ |
||||||
|
#include "postgres.h" |
||||||
|
|
||||||
|
#include <ctype.h> |
||||||
|
#include <stdio.h> |
||||||
|
#include <sys/types.h> |
||||||
|
#include <string.h> |
||||||
|
#include "postgres.h" |
||||||
|
#include "access/heapam.h" |
||||||
|
#include "catalog/catname.h" |
||||||
|
#include "catalog/indexing.h" |
||||||
|
#include "catalog/pg_proc.h" |
||||||
|
#include "executor/executor.h" |
||||||
|
#include "utils/fcache.h" |
||||||
|
#include "utils/sets.h" |
||||||
|
#include "utils/syscache.h" |
||||||
|
#include "access/tupmacs.h" |
||||||
|
#include "access/xact.h" |
||||||
|
#include "fmgr.h" |
||||||
|
#include "miscadmin.h" |
||||||
|
#include "utils/array.h" |
||||||
|
#include "utils/builtins.h" |
||||||
|
#include "utils/memutils.h" |
||||||
|
#include "utils/lsyscache.h" |
||||||
|
|
||||||
|
|
||||||
|
/* This is actually a postgres version of a one dimentional array */ |
||||||
|
|
||||||
|
typedef struct agg |
||||||
|
{ |
||||||
|
ArrayType a; |
||||||
|
int items; |
||||||
|
int lower; |
||||||
|
int4 array[1]; |
||||||
|
}PGARRAY; |
||||||
|
|
||||||
|
/* This is used to keep track of our position during enumeration */ |
||||||
|
typedef struct callContext |
||||||
|
{ |
||||||
|
PGARRAY *p; |
||||||
|
int num; |
||||||
|
int flags; |
||||||
|
}CTX; |
||||||
|
|
||||||
|
#define TOASTED 1 |
||||||
|
#define START_NUM 8 |
||||||
|
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4))) |
||||||
|
|
||||||
|
static PGARRAY * GetPGArray(int4 state, int fAdd); |
||||||
|
static PGARRAY *ShrinkPGArray(PGARRAY *p); |
||||||
|
|
||||||
|
Datum int_agg_state(PG_FUNCTION_ARGS); |
||||||
|
Datum int_agg_final_count(PG_FUNCTION_ARGS); |
||||||
|
Datum int_agg_final_array(PG_FUNCTION_ARGS); |
||||||
|
Datum int_enum(PG_FUNCTION_ARGS); |
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(int_agg_state); |
||||||
|
PG_FUNCTION_INFO_V1(int_agg_final_count); |
||||||
|
PG_FUNCTION_INFO_V1(int_agg_final_array); |
||||||
|
PG_FUNCTION_INFO_V1(int_enum); |
||||||
|
|
||||||
|
/*
|
||||||
|
* Manage the aggregation state of the array
|
||||||
|
* You need to specify the correct memory context, or it will vanish!
|
||||||
|
*/ |
||||||
|
static PGARRAY * GetPGArray(int4 state, int fAdd) |
||||||
|
{ |
||||||
|
PGARRAY *p = (PGARRAY *) state; |
||||||
|
|
||||||
|
if(!state) |
||||||
|
{ |
||||||
|
/* New array */ |
||||||
|
int cb = PGARRAY_SIZE(START_NUM); |
||||||
|
|
||||||
|
p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb); |
||||||
|
|
||||||
|
if(!p) |
||||||
|
{ |
||||||
|
elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory"); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
p->a.size = cb; |
||||||
|
p->a.ndim= 0; |
||||||
|
p->a.flags = 0; |
||||||
|
p->items = 0; |
||||||
|
p->lower= START_NUM; |
||||||
|
} |
||||||
|
else if(fAdd) |
||||||
|
{ /* Ensure array has space */ |
||||||
|
if(p->items >= p->lower) |
||||||
|
{ |
||||||
|
PGARRAY *pn; |
||||||
|
int n = p->lower + p->lower; |
||||||
|
int cbNew = PGARRAY_SIZE(n); |
||||||
|
|
||||||
|
pn = (PGARRAY *) repalloc(p, cbNew); |
||||||
|
|
||||||
|
if(!pn) |
||||||
|
{ /* Realloc failed! Reallocate new block. */ |
||||||
|
pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew); |
||||||
|
if(!pn) |
||||||
|
{ |
||||||
|
elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory"); |
||||||
|
return (PGARRAY *) NULL; |
||||||
|
} |
||||||
|
memcpy(pn, p, p->a.size); |
||||||
|
pfree(p); |
||||||
|
} |
||||||
|
pn->a.size = cbNew; |
||||||
|
pn->lower = n; |
||||||
|
return pn; |
||||||
|
} |
||||||
|
} |
||||||
|
return p; |
||||||
|
} |
||||||
|
|
||||||
|
/* Shrinks the array to its actual size and moves it into the standard
|
||||||
|
* memory allocation context, frees working memory */ |
||||||
|
static PGARRAY *ShrinkPGArray(PGARRAY *p) |
||||||
|
{ |
||||||
|
PGARRAY *pnew=NULL; |
||||||
|
if(p) |
||||||
|
{ |
||||||
|
/* get target size */ |
||||||
|
int cb = PGARRAY_SIZE(p->items); |
||||||
|
|
||||||
|
/* use current transaction context */ |
||||||
|
pnew = palloc(cb); |
||||||
|
|
||||||
|
if(pnew) |
||||||
|
{ |
||||||
|
/* Fix up the fields in the new structure, so Postgres understands */ |
||||||
|
memcpy(pnew, p, cb); |
||||||
|
pnew->a.size = cb; |
||||||
|
pnew->a.ndim=1; |
||||||
|
pnew->a.flags = 0; |
||||||
|
pnew->lower = 0; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
elog(ERROR, "Integer aggregator, can't allocate memory"); |
||||||
|
} |
||||||
|
pfree(p); |
||||||
|
}
|
||||||
|
return pnew; |
||||||
|
} |
||||||
|
|
||||||
|
/* Called for each iteration during an aggregate function */ |
||||||
|
Datum int_agg_state(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
int4 state = PG_GETARG_INT32(0); |
||||||
|
int4 value = PG_GETARG_INT32(1); |
||||||
|
|
||||||
|
PGARRAY *p = GetPGArray(state, 1); |
||||||
|
if(!p) |
||||||
|
{ |
||||||
|
elog(ERROR,"No aggregate storage\n"); |
||||||
|
} |
||||||
|
else if(p->items >= p->lower) |
||||||
|
{ |
||||||
|
elog(ERROR,"aggregate storage too small\n"); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
p->array[p->items++]= value; |
||||||
|
} |
||||||
|
PG_RETURN_INT32(p); |
||||||
|
} |
||||||
|
|
||||||
|
/* This is the final function used for the integer aggregator. It returns all the integers
|
||||||
|
* collected as a one dimentional integer array */ |
||||||
|
Datum int_agg_final_array(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0)); |
||||||
|
if(pnew) |
||||||
|
{ |
||||||
|
PG_RETURN_POINTER(pnew); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
PG_RETURN_NULL(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* This function accepts an array, and returns one item for each entry in the array */ |
||||||
|
Datum int_enum(PG_FUNCTION_ARGS) |
||||||
|
{ |
||||||
|
CTX *pc; |
||||||
|
PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0); |
||||||
|
ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo; |
||||||
|
|
||||||
|
if(!p) |
||||||
|
{ |
||||||
|
elog(NOTICE, "No data sent\n"); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
if(!rsi) |
||||||
|
{ |
||||||
|
elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer"); |
||||||
|
PG_RETURN_NULL(); |
||||||
|
|
||||||
|
} |
||||||
|
if(!fcinfo->context) |
||||||
|
{ |
||||||
|
/* Allocate a working context */ |
||||||
|
pc = (CTX *) palloc(sizeof(CTX)); |
||||||
|
|
||||||
|
if(!pc) |
||||||
|
{ |
||||||
|
elog(ERROR, "CTX Alocation failed\n"); |
||||||
|
PG_RETURN_NULL(); |
||||||
|
} |
||||||
|
|
||||||
|
/* Don't copy atribute if you don't need too */ |
||||||
|
if(VARATT_IS_EXTENDED(p) ) |
||||||
|
{ |
||||||
|
/* Toasted!!! */ |
||||||
|
pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p); |
||||||
|
pc->flags = TOASTED; |
||||||
|
if(!pc->p) |
||||||
|
{ |
||||||
|
elog(ERROR, "Error in toaster!!! no detoasting\n"); |
||||||
|
PG_RETURN_NULL(); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
/* Untoasted */ |
||||||
|
pc->p = p; |
||||||
|
pc->flags = 0; |
||||||
|
} |
||||||
|
fcinfo->context = (Node *) pc; |
||||||
|
pc->num=0; |
||||||
|
} |
||||||
|
else /* use an existing one */ |
||||||
|
{ |
||||||
|
pc = (CTX *) fcinfo->context; |
||||||
|
} |
||||||
|
/* Are we done yet? */ |
||||||
|
if(pc->num >= pc->p->items) |
||||||
|
{ |
||||||
|
/* We are done */ |
||||||
|
if(pc->flags & TOASTED) |
||||||
|
pfree(pc->p); |
||||||
|
pfree(fcinfo->context); |
||||||
|
fcinfo->context = NULL; |
||||||
|
rsi->isDone = ExprEndResult ; |
||||||
|
} |
||||||
|
else /* nope, return the next value */ |
||||||
|
{ |
||||||
|
int val = pc->p->array[pc->num++]; |
||||||
|
rsi->isDone = ExprMultipleResult; |
||||||
|
PG_RETURN_INT32(val); |
||||||
|
} |
||||||
|
PG_RETURN_NULL(); |
||||||
|
} |
@ -0,0 +1,40 @@ |
|||||||
|
-- Drop functions |
||||||
|
drop function int_agg_state (int4, int4); |
||||||
|
drop function int_agg_final_array (int4); |
||||||
|
drop aggregate int_array_aggregate(int4); |
||||||
|
drop function int_array_enum (int4[]); |
||||||
|
|
||||||
|
|
||||||
|
-- Internal function for the aggregate |
||||||
|
-- Is called for each item in an aggregation |
||||||
|
create function int_agg_state (int4, int4) |
||||||
|
returns int4 |
||||||
|
as 'MODULE_FILENAME','int_agg_state' |
||||||
|
language 'c'; |
||||||
|
|
||||||
|
-- Internal function for the aggregate |
||||||
|
-- Is called at the end of the aggregation, and returns an array. |
||||||
|
create function int_agg_final_array (int4) |
||||||
|
returns int4[] |
||||||
|
as 'MODULE_FILENAME','int_agg_final_array' |
||||||
|
language 'c'; |
||||||
|
|
||||||
|
-- The aggration funcion. |
||||||
|
-- uses the above functions to create an array of integers from an aggregation. |
||||||
|
create aggregate int_array_aggregate |
||||||
|
( |
||||||
|
BASETYPE = int4, |
||||||
|
SFUNC = int_agg_state, |
||||||
|
STYPE = int4, |
||||||
|
FINALFUNC = int_agg_final_array, |
||||||
|
INITCOND = 0 |
||||||
|
); |
||||||
|
|
||||||
|
-- The enumeration function |
||||||
|
-- returns each element in a one dimentional integer array |
||||||
|
-- as a row. |
||||||
|
create function int_array_enum(int4[]) |
||||||
|
returns setof integer |
||||||
|
as 'MODULE_FILENAME','int_enum' |
||||||
|
language 'c'; |
||||||
|
|
Loading…
Reference in new issue