Add integer aggregator to /contrib.

mlw
24 years ago · 2146d8c6a0
parent e105f9a119
commit 2146d8c6a0
5 changed files with 403 additions and 2 deletions
--- a/contrib/README
+++ b/contrib/README
@ -71,10 +71,14 @@ fuzzystrmatch -
 	Levenshtein, metaphone, and soundex fuzzy string matching
 	by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>
 intagg -
 	Integer aggregator
 	by  mlw <markw@mohawksoft.com>
 intarray -
 	Index support for arrays of int4, using GiST
-	by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
+	by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov <oleg@sai.msu.su>
 	<oleg@sai.msu.su>.
 ipc_check -
 	Simple test script to help in configuring IPC.
--- a/contrib/intagg/Makefile
+++ b/contrib/intagg/Makefile
@ -0,0 +1,31 @@
 #############################################
 # Makefile for integer aggregator
 # Copyright (C) 2001 Digital Music Network.
 # by Mark L. Woodward
 #
 subdir = contrib/intagg
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 NAME=int_aggregate
 SONAME	= $(NAME)$(DLSUFFIX)
 MODULES = int_aggregate
 DATA_built = int_aggregate.so
 DOCS = README.int_aggrigate
 SQLS=int_aggregate.sql
 include $(top_srcdir)/contrib/contrib-global.mk
 %.sql: %.sql.in
 	sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
 all : $(SONAME) $(SQLS)
 install : all
 	$(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
 clean :
 	rm -f $(SONAME)
 	rm -f $(SQLS)
--- a/contrib/intagg/README.int_aggrigate
+++ b/contrib/intagg/README.int_aggrigate
@ -0,0 +1,55 @@
 Integer aggregator/enumerator.
 Many database systems have the notion of a one to many table.
 A one to many table usually sits between two indexed tables, 
 as: 
 create table one_to_many(left int, right int) ;
 And it is used like this:
 SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right) 
 	WHERE  one_to_many.left = item;
 This will return all the items in the right hand table for an entry 
 in the left hand table. This is a very common construct in SQL.
 Now, this methodology can be cumbersome with a very large number of
 entries in the one_to_many table. Depending on the order in which
 data was entered, a join like this could result in an index scan
 and a fetch for each right hand entry in the table for a particular
 left hand entry.
 If you have a very dynamic system, there is not much you can do. 
 However, if you have some data which is fairly static, you can
 create a summary table with the aggregator.
 CREATE TABLE summary as SELECT left, int_array_aggregate(right) 
 	AS right FROM one_to_many GROUP BY left;
 This will create a table with one row per left item, and an array
 of right items. Now this is pretty useless without some way of using
 the array, thats why there is an array enumerator.
 SELECT left, int_array_enum(right) FROM summary WHERE left = item;
 The above query using int_array_enum, produces the same results as:
 SELECT left, right FROM one_to_many WHERE left = item;
 The difference is that the query against the summary table has to get
 only one row from the table, where as the query against "one_to_many"
 must index scan and fetch a row for each entry.
 On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced
 to a cost of 329. The query is a join between the one_to_many table,
 select right, count(right) from 
 (
 	select left, int_array_enum(right) as right from summary join
                (select left from left_table where left = item) as lefts
                 ON (summary.left = lefts.left ) 
 ) as list group by right order by count desc ;
--- a/contrib/intagg/int_aggregate.c
+++ b/contrib/intagg/int_aggregate.c
@ -0,0 +1,271 @@
 /*
 * Integer array aggregator / enumerator
 *
 * Mark L. Woodward 
 * DMN Digital Music Network.
 * www.dmn.com
 *
 * Copyright (C) Digital Music Network
 * December 20, 2001
 *
 * This file is the property of the Digital Music Network (DMN).
 * It is being made available to users of the PostgreSQL system
 * under the BSD license.
 *
 */
 #include "postgres.h"
 #include <ctype.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <string.h>
 #include "postgres.h"
 #include "access/heapam.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_proc.h"
 #include "executor/executor.h"
 #include "utils/fcache.h"
 #include "utils/sets.h"
 #include "utils/syscache.h"
 #include "access/tupmacs.h"
 #include "access/xact.h"
 #include "fmgr.h"
 #include "miscadmin.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "utils/lsyscache.h"
 /* This is actually a postgres version of a one dimentional array */
 typedef struct agg
 {
 	ArrayType a;
 	int 	items;
 	int 	lower;
 	int4	array[1];
 }PGARRAY;
 /* This is used to keep track of our position during enumeration */
 typedef struct callContext
 {
 	PGARRAY *p;
 	int num;
 	int flags;
 }CTX;
 #define TOASTED		1
 #define START_NUM 	8
 #define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
 static PGARRAY * GetPGArray(int4 state, int fAdd);
 static PGARRAY *ShrinkPGArray(PGARRAY *p);
 Datum int_agg_state(PG_FUNCTION_ARGS);
 Datum int_agg_final_count(PG_FUNCTION_ARGS);
 Datum int_agg_final_array(PG_FUNCTION_ARGS);
 Datum int_enum(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(int_agg_state);
 PG_FUNCTION_INFO_V1(int_agg_final_count);
 PG_FUNCTION_INFO_V1(int_agg_final_array);
 PG_FUNCTION_INFO_V1(int_enum);
 /* 
 * Manage the aggregation state of the array 
 * You need to specify the correct memory context, or it will vanish! 
 */
 static PGARRAY * GetPGArray(int4 state, int fAdd)
 {
 	PGARRAY *p = (PGARRAY *) state;
 	if(!state)
 	{
 		/* New array */
 		int cb = PGARRAY_SIZE(START_NUM);
 		p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb);
 		if(!p)
 		{
 			elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory");
 			return 0;
 		}
 		p->a.size = cb;
 		p->a.ndim= 0;
 		p->a.flags = 0;
 		p->items = 0;
 		p->lower= START_NUM;
 	}
 	else if(fAdd)
 	{	/* Ensure array has space */
 		if(p->items >= p->lower)
 		{
 			PGARRAY *pn;
 			int n = p->lower + p->lower;
 			int cbNew = PGARRAY_SIZE(n);
 			pn = (PGARRAY *) repalloc(p, cbNew);
 			if(!pn)
 			{	/* Realloc failed! Reallocate new block. */
 				pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew);
 				if(!pn)
 				{
 					elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory");
 					return (PGARRAY *) NULL;
 				}
 				memcpy(pn, p, p->a.size);
 				pfree(p);
 			}
 			pn->a.size = cbNew;
 			pn->lower = n;
 			return pn;
 		}
 	}
 	return p;
 }
 /* Shrinks the array to its actual size and moves it into the standard 
 * memory allocation context, frees working memory  */
 static PGARRAY *ShrinkPGArray(PGARRAY *p)
 {
 	PGARRAY *pnew=NULL;
 	if(p)
 	{
 		/* get target size */
 		int cb = PGARRAY_SIZE(p->items);
 		/* use current transaction context */
 		pnew = palloc(cb);
 		if(pnew)
 		{
 			/* Fix up the fields in the new structure, so Postgres understands */
 			memcpy(pnew, p, cb);
 			pnew->a.size = cb;
 			pnew->a.ndim=1;
 			pnew->a.flags = 0;
 			pnew->lower = 0;
 		}
 		else
 		{
 			elog(ERROR, "Integer aggregator, can't allocate memory");
 		}
 		pfree(p);
 	}	
 	return pnew;
 }
 /* Called for each iteration during an aggregate function */
 Datum int_agg_state(PG_FUNCTION_ARGS)
 {
 	int4 state = PG_GETARG_INT32(0);
 	int4 value = PG_GETARG_INT32(1);
 	PGARRAY *p = GetPGArray(state, 1);
 	if(!p)
 	{
 		elog(ERROR,"No aggregate storage\n");
 	}
 	else if(p->items >= p->lower)
 	{
 		elog(ERROR,"aggregate storage too small\n");
 	}
 	else
 	{
 		p->array[p->items++]= value;
 	}
 	PG_RETURN_INT32(p);
 }
 /* This is the final function used for the integer aggregator. It returns all the integers
 * collected as a one dimentional integer array */
 Datum int_agg_final_array(PG_FUNCTION_ARGS)
 {
 	PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0));
 	if(pnew)
 	{
 		PG_RETURN_POINTER(pnew);
 	}
 	else
 	{
 		PG_RETURN_NULL();
 	}
 }
 /* This function accepts an array, and returns one item for each entry in the array */
 Datum int_enum(PG_FUNCTION_ARGS)
 {
 	CTX *pc;
 	PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0);
 	ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo;
 	if(!p)
 	{
 		elog(NOTICE, "No data sent\n");
 		return 0;
 	}
 	if(!rsi)
 	{
 		elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer");
 		PG_RETURN_NULL();
 	}
 	if(!fcinfo->context)
 	{
 		/* Allocate a working context */
 		pc = (CTX *) palloc(sizeof(CTX));
 		if(!pc)
 		{
 			elog(ERROR, "CTX Alocation failed\n");
 			PG_RETURN_NULL();
 		}
 		/* Don't copy atribute if you don't need too */
 		if(VARATT_IS_EXTENDED(p) )
 		{
 			/* Toasted!!! */
 			pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p);
 			pc->flags = TOASTED;
 			if(!pc->p)
 			{
 				elog(ERROR, "Error in toaster!!! no detoasting\n");
 				PG_RETURN_NULL();
 			}
 		}
 		else
 		{
 			/* Untoasted */
 			pc->p = p;
 			pc->flags = 0;
 		}
 		fcinfo->context = (Node *) pc;
 		pc->num=0;
 	}
 	else /* use an existing one */
 	{
 		pc = (CTX *) fcinfo->context;
 	}
 	/* Are we done yet? */
 	if(pc->num >= pc->p->items)
 	{
 		/* We are done */
 		if(pc->flags & TOASTED)
 			pfree(pc->p);
 		pfree(fcinfo->context);
 		fcinfo->context = NULL;
 		rsi->isDone = ExprEndResult ;
 	}
 	else	/* nope, return the next value */
 	{
 		int val = pc->p->array[pc->num++];
 		rsi->isDone = ExprMultipleResult;
 		PG_RETURN_INT32(val);
 	}
 	PG_RETURN_NULL();
 }
--- a/contrib/intagg/int_aggregate.sql.in
+++ b/contrib/intagg/int_aggregate.sql.in
@ -0,0 +1,40 @@
 -- Drop functions
 drop function int_agg_state (int4, int4);
 drop function int_agg_final_array (int4);
 drop aggregate int_array_aggregate(int4);
 drop function int_array_enum (int4[]);
 -- Internal function for the aggregate
 -- Is called for each item in an aggregation
 create function int_agg_state (int4, int4)
 	returns int4
 	as 'MODULE_FILENAME','int_agg_state'
 	language 'c';
 -- Internal function for the aggregate
 -- Is called at the end of the aggregation, and returns an array.
 create function int_agg_final_array (int4)
 	returns int4[]
 	as 'MODULE_FILENAME','int_agg_final_array'
 	language 'c';
 -- The aggration funcion.
 -- uses the above functions to create an array of integers from an aggregation.
 create aggregate int_array_aggregate
 (
 	BASETYPE = int4,
 	SFUNC = int_agg_state,
 	STYPE = int4,
 	FINALFUNC = int_agg_final_array,
 	INITCOND = 0
 );
 -- The enumeration function
 -- returns each element in a one dimentional integer array
 -- as a row.
 create function int_array_enum(int4[])
 	returns setof integer
 	as 'MODULE_FILENAME','int_enum'
 	language 'c';