Add sample text search dictionary templates and parsers, to replace the

hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
18 years ago · 5fcb079858
parent fb631dba2a
commit 5fcb079858
24 changed files with 1324 additions and 9 deletions
--- a/contrib/Makefile
+++ b/contrib/Makefile
@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.80 2007/10/13 22:59:43 tgl Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.81 2007/10/15 21:36:49 tgl Exp $
 subdir = contrib
 top_builddir = ..
@ -10,6 +10,8 @@ WANTED_DIRS = \
 		chkpass		\
 		cube		\
 		dblink		\
 		dict_int	\
 		dict_xsyn	\
 		earthdistance	\
 		fuzzystrmatch	\
 		hstore		\
@ -31,6 +33,7 @@ WANTED_DIRS = \
 		seg		\
 		spi		\
 		tablefunc	\
 		test_parser	\
 		vacuumlo
 ifeq ($(with_openssl),yes)
--- a/contrib/README
+++ b/contrib/README
@ -1,4 +1,3 @@
 The PostgreSQL contrib tree
 ---------------------------
@ -29,8 +28,8 @@ adminpack -
 	by Dave Page <dpage@vale-housing.co.uk>
 btree_gist -
-      Support for emulating BTREE indexing in GiST
+	Support for emulating BTREE indexing in GiST
-      by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
+	by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
 chkpass -
 	An auto-encrypted password datatype
@ -44,8 +43,16 @@ dblink -
 	Allows remote query execution
 	by Joe Conway <mail@joeconway.com>
 dict_int -
 	Text search dictionary template for integers
 	by Sergey Karpov <karpov@sao.ru>
 dict_xsyn -
 	Text search dictionary template for extended synonym processing
 	by Sergey Karpov <karpov@sao.ru>
 earthdistance -
-	Operator for computing earth distance for two points
+	Operator for computing earth distance between two points
 	by Hal Snyder <hal@vailsys.com>
 fuzzystrmatch -
@ -53,8 +60,8 @@ fuzzystrmatch -
 	by Joe Conway <mail@joeconway.com>, Joel Burton <jburton@scw.org>
 hstore -
-	Hstore - module for storing (key,value) pairs
+	Module for storing (key, value) pairs
-    by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
+	by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
 intagg -
 	Integer aggregator
@ -92,6 +99,10 @@ pg_freespacemap -
 	Displays the contents of the free space map (FSM)
 	by Mark Kirkwood <markir@paradise.net.nz>
 pg_standby -
 	Sample archive_command for warm standby operation
 	by Simon Riggs <simon@2ndquadrant.com>
 pg_trgm -
 	Functions for determining the similarity of text based on trigram
 	matching.
@ -110,7 +121,7 @@ pgrowlocks -
 	by Tatsuo Ishii <ishii@sraoss.co.jp>
 pgstattuple -
-	A function to return statistics about "dead" tuples and free
+	Functions to return statistics about "dead" tuples and free
 	space within a table
 	by Tatsuo Ishii <ishii@sraoss.co.jp>
@ -126,12 +137,16 @@ sslinfo -
 	by Victor Wagner <vitus@cryptocom.ru>
 start-scripts - 
-	Scripts for starting the server at boot time.
+	Scripts for starting the server at boot time on various platforms.
 tablefunc -
 	Examples of functions returning tables
 	by Joe Conway <mail@joeconway.com>
 test_parser -
 	Sample text search parser
 	by Sergey Karpov <karpov@sao.ru>
 tsearch2 -
 	Full-text-index support using GiST
 	by Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov
--- a/contrib/dict_int/Makefile
+++ b/contrib/dict_int/Makefile
@ -0,0 +1,19 @@
 # $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
 MODULE_big = dict_int
 OBJS = dict_int.o
 DATA_built = dict_int.sql
 DATA = uninstall_dict_int.sql
 DOCS = README.dict_int
 REGRESS = dict_int
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
 else
 subdir = contrib/dict_int
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
--- a/contrib/dict_int/README.dict_int
+++ b/contrib/dict_int/README.dict_int
@ -0,0 +1,41 @@
 Dictionary for integers
 =======================
 The motivation for this example dictionary is to control the indexing of
 integers (signed and unsigned), and, consequently, to minimize the number of
 unique words which greatly affect the performance of searching.
 * Configuration
 The dictionary accepts two options: 
  - The MAXLEN parameter specifies the maximum length (number of digits)
    allowed in an integer word.  The default value is 6.
  - The REJECTLONG parameter specifies if an overlength integer should be
    truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
    the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
    dictionary treats an overlength integer as a stop word, so that it will
    not be indexed.
 * Usage
 1. Compile and install
 2. Load dictionary
   psql mydb < dict_int.sql
 3. Test it
   mydb# select ts_lexize('intdict', '12345678');
    ts_lexize
   -----------
    {123456}
 4. Change its options as you wish
   mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
   ALTER TEXT SEARCH DICTIONARY
 That's all.
--- a/contrib/dict_int/dict_int.c
+++ b/contrib/dict_int/dict_int.c
@ -0,0 +1,99 @@
 /*-------------------------------------------------------------------------
 *
 * dict_int.c
 *	  Text search dictionary for integers
 *
 * Copyright (c) 2007, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include "commands/defrem.h"
 #include "fmgr.h"
 #include "tsearch/ts_public.h"
 PG_MODULE_MAGIC;
 typedef struct {
 	int     maxlen;
 	bool    rejectlong;
 } DictInt;
 PG_FUNCTION_INFO_V1(dintdict_init);
 Datum dintdict_init(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(dintdict_lexize);
 Datum dintdict_lexize(PG_FUNCTION_ARGS);
 Datum
 dintdict_init(PG_FUNCTION_ARGS)
 {
 	List		*dictoptions = (List *) PG_GETARG_POINTER(0);
 	DictInt 	*d;
 	ListCell	*l;
 	d = (DictInt *) palloc0(sizeof(DictInt));
 	d->maxlen = 6;
 	d->rejectlong = false;
 	foreach(l, dictoptions)
 	{
 		DefElem *defel = (DefElem *) lfirst(l);
 		if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
 		{
 			d->maxlen = atoi(defGetString(defel));
 		}
 		else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
 		{
 			d->rejectlong = defGetBoolean(defel);
 		}
 		else
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized intdict parameter: \"%s\"",
 							defel->defname)));
 		}
 	}
 	PG_RETURN_POINTER(d);
 }
 Datum
 dintdict_lexize(PG_FUNCTION_ARGS)
 {
 	DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
 	char       *in = (char*)PG_GETARG_POINTER(1);
 	char *txt = pnstrdup(in, PG_GETARG_INT32(2));
 	TSLexeme *res=palloc(sizeof(TSLexeme)*2);
 	res[1].lexeme = NULL;
 	if  (PG_GETARG_INT32(2) > d->maxlen)
 	{
 		if ( d->rejectlong )
 		{
 			/* reject by returning void array */
 			pfree(txt);
 			res[0].lexeme = NULL;
 		}
 		else
 		{
 			/* trim integer */
 			txt[d->maxlen] = '\0';
 			res[0].lexeme = txt;
 		}
 	}
 	else
 	{
 		res[0].lexeme = txt;
 	}
 	PG_RETURN_POINTER(res);
 }
--- a/contrib/dict_int/dict_int.sql.in
+++ b/contrib/dict_int/dict_int.sql.in
@ -0,0 +1,29 @@
 -- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
 -- Adjust this setting to control where the objects get created.
 SET search_path = public;
 BEGIN;
 CREATE FUNCTION dintdict_init(internal)
        RETURNS internal
        AS 'MODULE_PATHNAME'
        LANGUAGE C STRICT;
 CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
        RETURNS internal
        AS 'MODULE_PATHNAME'
        LANGUAGE C STRICT;
 CREATE TEXT SEARCH TEMPLATE intdict_template (
        LEXIZE = dintdict_lexize,
 	INIT   = dintdict_init
 );
 CREATE TEXT SEARCH DICTIONARY intdict (
 	TEMPLATE = intdict_template
 );
 COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
 END;
--- a/contrib/dict_int/expected/dict_int.out
+++ b/contrib/dict_int/expected/dict_int.out
@ -0,0 +1,308 @@
 --
 -- first, define the datatype.  Turn off echoing so that expected file
 -- does not depend on contents of this file.
 --
 SET client_min_messages = warning;
 \set ECHO none
 RESET client_min_messages;
 --lexize
 select ts_lexize('intdict', '511673');
 ts_lexize 
 -----------
 {511673}
 (1 row)
 select ts_lexize('intdict', '129');
 ts_lexize 
 -----------
 {129}
 (1 row)
 select ts_lexize('intdict', '40865854');
 ts_lexize 
 -----------
 {408658}
 (1 row)
 select ts_lexize('intdict', '952');
 ts_lexize 
 -----------
 {952}
 (1 row)
 select ts_lexize('intdict', '654980341');
 ts_lexize 
 -----------
 {654980}
 (1 row)
 select ts_lexize('intdict', '09810106');
 ts_lexize 
 -----------
 {098101}
 (1 row)
 select ts_lexize('intdict', '14262713');
 ts_lexize 
 -----------
 {142627}
 (1 row)
 select ts_lexize('intdict', '6532082986');
 ts_lexize 
 -----------
 {653208}
 (1 row)
 select ts_lexize('intdict', '0150061');
 ts_lexize 
 -----------
 {015006}
 (1 row)
 select ts_lexize('intdict', '7778');
 ts_lexize 
 -----------
 {7778}
 (1 row)
 select ts_lexize('intdict', '9547');
 ts_lexize 
 -----------
 {9547}
 (1 row)
 select ts_lexize('intdict', '753395478');
 ts_lexize 
 -----------
 {753395}
 (1 row)
 select ts_lexize('intdict', '647652');
 ts_lexize 
 -----------
 {647652}
 (1 row)
 select ts_lexize('intdict', '6988655574');
 ts_lexize 
 -----------
 {698865}
 (1 row)
 select ts_lexize('intdict', '1279');
 ts_lexize 
 -----------
 {1279}
 (1 row)
 select ts_lexize('intdict', '1266645909');
 ts_lexize 
 -----------
 {126664}
 (1 row)
 select ts_lexize('intdict', '7594193969');
 ts_lexize 
 -----------
 {759419}
 (1 row)
 select ts_lexize('intdict', '16928207');
 ts_lexize 
 -----------
 {169282}
 (1 row)
 select ts_lexize('intdict', '196850350328');
 ts_lexize 
 -----------
 {196850}
 (1 row)
 select ts_lexize('intdict', '22026985592');
 ts_lexize 
 -----------
 {220269}
 (1 row)
 select ts_lexize('intdict', '2063765');
 ts_lexize 
 -----------
 {206376}
 (1 row)
 select ts_lexize('intdict', '242387310');
 ts_lexize 
 -----------
 {242387}
 (1 row)
 select ts_lexize('intdict', '93595');
 ts_lexize 
 -----------
 {93595}
 (1 row)
 select ts_lexize('intdict', '9374');
 ts_lexize 
 -----------
 {9374}
 (1 row)
 select ts_lexize('intdict', '996969');
 ts_lexize 
 -----------
 {996969}
 (1 row)
 select ts_lexize('intdict', '353595982');
 ts_lexize 
 -----------
 {353595}
 (1 row)
 select ts_lexize('intdict', '925860');
 ts_lexize 
 -----------
 {925860}
 (1 row)
 select ts_lexize('intdict', '11848378337');
 ts_lexize 
 -----------
 {118483}
 (1 row)
 select ts_lexize('intdict', '333');
 ts_lexize 
 -----------
 {333}
 (1 row)
 select ts_lexize('intdict', '799287416765');
 ts_lexize 
 -----------
 {799287}
 (1 row)
 select ts_lexize('intdict', '745939');
 ts_lexize 
 -----------
 {745939}
 (1 row)
 select ts_lexize('intdict', '67601305734');
 ts_lexize 
 -----------
 {676013}
 (1 row)
 select ts_lexize('intdict', '3361113');
 ts_lexize 
 -----------
 {336111}
 (1 row)
 select ts_lexize('intdict', '9033778607');
 ts_lexize 
 -----------
 {903377}
 (1 row)
 select ts_lexize('intdict', '7507648');
 ts_lexize 
 -----------
 {750764}
 (1 row)
 select ts_lexize('intdict', '1166');
 ts_lexize 
 -----------
 {1166}
 (1 row)
 select ts_lexize('intdict', '9360498');
 ts_lexize 
 -----------
 {936049}
 (1 row)
 select ts_lexize('intdict', '917795');
 ts_lexize 
 -----------
 {917795}
 (1 row)
 select ts_lexize('intdict', '9387894');
 ts_lexize 
 -----------
 {938789}
 (1 row)
 select ts_lexize('intdict', '42764329');
 ts_lexize 
 -----------
 {427643}
 (1 row)
 select ts_lexize('intdict', '564062');
 ts_lexize 
 -----------
 {564062}
 (1 row)
 select ts_lexize('intdict', '5413377');
 ts_lexize 
 -----------
 {541337}
 (1 row)
 select ts_lexize('intdict', '060965');
 ts_lexize 
 -----------
 {060965}
 (1 row)
 select ts_lexize('intdict', '08273593');
 ts_lexize 
 -----------
 {082735}
 (1 row)
 select ts_lexize('intdict', '593556010144');
 ts_lexize 
 -----------
 {593556}
 (1 row)
 select ts_lexize('intdict', '17988843352');
 ts_lexize 
 -----------
 {179888}
 (1 row)
 select ts_lexize('intdict', '252281774');
 ts_lexize 
 -----------
 {252281}
 (1 row)
 select ts_lexize('intdict', '313425');
 ts_lexize 
 -----------
 {313425}
 (1 row)
 select ts_lexize('intdict', '641439323669');
 ts_lexize 
 -----------
 {641439}
 (1 row)
 select ts_lexize('intdict', '314532610153');
 ts_lexize 
 -----------
 {314532}
 (1 row)
--- a/contrib/dict_int/sql/dict_int.sql
+++ b/contrib/dict_int/sql/dict_int.sql
@ -0,0 +1,61 @@
 --
 -- first, define the datatype.  Turn off echoing so that expected file
 -- does not depend on contents of this file.
 --
 SET client_min_messages = warning;
 \set ECHO none
 \i dict_int.sql
 \set ECHO all
 RESET client_min_messages;
 --lexize
 select ts_lexize('intdict', '511673');
 select ts_lexize('intdict', '129');
 select ts_lexize('intdict', '40865854');
 select ts_lexize('intdict', '952');
 select ts_lexize('intdict', '654980341');
 select ts_lexize('intdict', '09810106');
 select ts_lexize('intdict', '14262713');
 select ts_lexize('intdict', '6532082986');
 select ts_lexize('intdict', '0150061');
 select ts_lexize('intdict', '7778');
 select ts_lexize('intdict', '9547');
 select ts_lexize('intdict', '753395478');
 select ts_lexize('intdict', '647652');
 select ts_lexize('intdict', '6988655574');
 select ts_lexize('intdict', '1279');
 select ts_lexize('intdict', '1266645909');
 select ts_lexize('intdict', '7594193969');
 select ts_lexize('intdict', '16928207');
 select ts_lexize('intdict', '196850350328');
 select ts_lexize('intdict', '22026985592');
 select ts_lexize('intdict', '2063765');
 select ts_lexize('intdict', '242387310');
 select ts_lexize('intdict', '93595');
 select ts_lexize('intdict', '9374');
 select ts_lexize('intdict', '996969');
 select ts_lexize('intdict', '353595982');
 select ts_lexize('intdict', '925860');
 select ts_lexize('intdict', '11848378337');
 select ts_lexize('intdict', '333');
 select ts_lexize('intdict', '799287416765');
 select ts_lexize('intdict', '745939');
 select ts_lexize('intdict', '67601305734');
 select ts_lexize('intdict', '3361113');
 select ts_lexize('intdict', '9033778607');
 select ts_lexize('intdict', '7507648');
 select ts_lexize('intdict', '1166');
 select ts_lexize('intdict', '9360498');
 select ts_lexize('intdict', '917795');
 select ts_lexize('intdict', '9387894');
 select ts_lexize('intdict', '42764329');
 select ts_lexize('intdict', '564062');
 select ts_lexize('intdict', '5413377');
 select ts_lexize('intdict', '060965');
 select ts_lexize('intdict', '08273593');
 select ts_lexize('intdict', '593556010144');
 select ts_lexize('intdict', '17988843352');
 select ts_lexize('intdict', '252281774');
 select ts_lexize('intdict', '313425');
 select ts_lexize('intdict', '641439323669');
 select ts_lexize('intdict', '314532610153');
--- a/contrib/dict_int/uninstall_dict_int.sql
+++ b/contrib/dict_int/uninstall_dict_int.sql
@ -0,0 +1,9 @@
 SET search_path = public;
 DROP TEXT SEARCH DICTIONARY intdict;
 DROP TEXT SEARCH TEMPLATE intdict_template;
 DROP FUNCTION dintdict_init(internal);
 DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);
--- a/contrib/dict_xsyn/Makefile
+++ b/contrib/dict_xsyn/Makefile
@ -0,0 +1,38 @@
 # $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
 MODULE_big = dict_xsyn
 OBJS = dict_xsyn.o
 DATA_built = dict_xsyn.sql
 DATA = uninstall_dict_xsyn.sql
 DOCS = README.dict_xsyn
 REGRESS = dict_xsyn
 DICTDIR = tsearch_data
 DICTFILES = xsyn_sample.rules
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
 else
 subdir = contrib/dict_xsyn
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
 install: install-data
 .PHONY: install-data
 install-data: $(DICTFILES)
 	for i in $(DICTFILES); \
 		do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
 	done
 uninstall: uninstall-data
 .PHONY: uninstall-data
 uninstall-data:
 	for i in $(DICTFILES); \
 		do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
 	done
--- a/contrib/dict_xsyn/README.dict_xsyn
+++ b/contrib/dict_xsyn/README.dict_xsyn
@ -0,0 +1,52 @@
 Extended Synonym dictionary
 ===========================
 This is a simple synonym dictionary. It replaces words with groups of their
 synonyms, and so makes it possible to search for a word using any of its
 synonyms.
 * Configuration
 It accepts the following options:
 - KEEPORIG controls whether the original word is included, or only its
   synonyms. Default is 'true'.
 - RULES is the base name of the file containing the list of synonyms.
   This file must be in $(prefix)/share/tsearch_data/, and its name must
   end in ".rules" (which is not included in the RULES parameter).
 The rules file has the following format:
 - Each line represents a group of synonyms for a single word, which is
   given first on the line. Synonyms are separated by whitespace:
   word syn1 syn2 syn3
 - Sharp ('#') sign is a comment delimiter. It may appear at any position
   inside the line.  The rest of the line will be skipped.
 Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
 for an example.
 * Usage
 1. Compile and install
 2. Load dictionary
   psql mydb < dict_xsyn.sql
 3. Test it
   mydb=# SELECT ts_lexize('xsyn','word');
   ts_lexize
   ----------------
   {word,syn1,syn2,syn3)
 4. Change the dictionary options as you wish
   mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
   ALTER TEXT SEARCH DICTIONARY
 That's all.
--- a/contrib/dict_xsyn/dict_xsyn.c
+++ b/contrib/dict_xsyn/dict_xsyn.c
@ -0,0 +1,235 @@
 /*-------------------------------------------------------------------------
 *
 * dict_xsyn.c
 *	  Extended synonym dictionary
 *
 * Copyright (c) 2007, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include <ctype.h>
 #include "commands/defrem.h"
 #include "fmgr.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_utils.h"
 PG_MODULE_MAGIC;
 typedef struct
 {
 	char *key; /* Word */
 	char *value; /* Unparsed list of synonyms, including the word itself */
 }	Syn;
 typedef struct
 {
 	int len;
 	Syn *syn;
 	bool keeporig;
 }	DictSyn;
 PG_FUNCTION_INFO_V1(dxsyn_init);
 Datum dxsyn_init(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(dxsyn_lexize);
 Datum dxsyn_lexize(PG_FUNCTION_ARGS);
 static char *
 find_word(char *in, char **end)
 {
 	char *start;
 	*end = NULL;
 	while (*in && t_isspace(in))
 		in += pg_mblen(in);
 	if (!*in || *in == '#')
 		return NULL;
 	start = in;
 	while (*in && !t_isspace(in))
 		in += pg_mblen(in);
 	*end = in;
 	return start;
 }
 static int
 compare_syn(const void *a, const void *b)
 {
 	return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
 }
 static void
 read_dictionary(DictSyn *d, char *filename)
 {
 	char *real_filename = get_tsearch_config_filename(filename, "rules");
 	FILE *fin;
 	char *line;
 	int cur = 0;
 	if ((fin = AllocateFile(real_filename, "r")) == NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_CONFIG_FILE_ERROR),
 				 errmsg("could not open synonym file \"%s\": %m",
 						real_filename)));
 	while ((line = t_readline(fin)) != NULL)
 	{
 		char *value;
 		char *key;
 		char *end = NULL;
 		if (*line == '\0')
 			continue;
 		value = lowerstr(line);
 		pfree(line);
 		key = find_word(value, &end);
 		if (!key)
 		{
 			pfree(value);
 			continue;
 		}
 		if (cur == d->len)
 		{
 			d->len = (d->len > 0) ? 2 * d->len : 16;
 			if (d->syn)
 				d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
 			else
 				d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
 		}
 		d->syn[cur].key = pnstrdup(key, end - key);
 		d->syn[cur].value = value;
 		cur++;
 	}
 	FreeFile(fin);
 	d->len = cur;
 	if (cur > 1)
 		qsort(d->syn, d->len, sizeof(Syn), compare_syn);
 	pfree(real_filename);
 }
 Datum
 dxsyn_init(PG_FUNCTION_ARGS)
 {
 	List *dictoptions = (List *) PG_GETARG_POINTER(0);
 	DictSyn *d;
 	ListCell *l;
 	d = (DictSyn *) palloc0(sizeof(DictSyn));
 	d->len = 0;
 	d->syn = NULL;
 	d->keeporig = true;
 	foreach(l, dictoptions)
 	{
 		DefElem *defel = (DefElem *) lfirst(l);
 		if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
 		{
 			d->keeporig = defGetBoolean(defel);
 		}
 		else if (pg_strcasecmp(defel->defname, "RULES") == 0)
 		{
 			read_dictionary(d, defGetString(defel));
 		}
 		else
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized xsyn parameter: \"%s\"",
 							defel->defname)));
 		}
 	}
 	PG_RETURN_POINTER(d);
 }
 Datum
 dxsyn_lexize(PG_FUNCTION_ARGS)
 {
 	DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
 	char *in = (char *) PG_GETARG_POINTER(1);
 	int length = PG_GETARG_INT32(2);
 	Syn word;
 	Syn *found;
 	TSLexeme *res = NULL;
 	if (!length || d->len == 0)
 		PG_RETURN_POINTER(NULL);
 	/* Create search pattern */
 	{
 		char *temp = pnstrdup(in, length);
 		word.key = lowerstr(temp);
 		pfree(temp);
 		word.value = NULL;
 	}
 	/* Look for matching syn */
 	found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
 	pfree(word.key);
 	if (!found)
 		PG_RETURN_POINTER(NULL);
 	/* Parse string of synonyms and return array of words */
 	{
 		char *value = pstrdup(found->value);
 		int value_length = strlen(value);
 		char *pos = value;
 		int nsyns = 0;
 		bool is_first = true;
 		res = palloc(0);
 		while(pos < value + value_length)
 		{
 			char *end;
 			char *syn = find_word(pos, &end);
 			if (!syn)
 				break;
 			*end = '\0';
 			res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2));
 			res[nsyns].lexeme = NULL;
 			/* first word is added to result only if KEEPORIG flag is set */
 			if(d->keeporig || !is_first)
 			{
 				res[nsyns].lexeme = pstrdup(syn);
 				res[nsyns + 1].lexeme = NULL;
 				nsyns++;
 			}
 			is_first = false;
 			pos = end + 1;
 		}
 		pfree(value);
 	}
 	PG_RETURN_POINTER(res);
 }
--- a/contrib/dict_xsyn/dict_xsyn.sql.in
+++ b/contrib/dict_xsyn/dict_xsyn.sql.in
@ -0,0 +1,29 @@
 -- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
 -- Adjust this setting to control where the objects get created.
 SET search_path = public;
 BEGIN;
 CREATE FUNCTION dxsyn_init(internal)
        RETURNS internal
        AS 'MODULE_PATHNAME'
        LANGUAGE C STRICT;
 CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
        RETURNS internal
        AS 'MODULE_PATHNAME'
        LANGUAGE C STRICT;
 CREATE TEXT SEARCH TEMPLATE xsyn_template (
        LEXIZE = dxsyn_lexize,
 	INIT   = dxsyn_init
 );
 CREATE TEXT SEARCH DICTIONARY xsyn (
 	TEMPLATE = xsyn_template
 );
 COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
 END;
--- a/contrib/dict_xsyn/expected/dict_xsyn.out
+++ b/contrib/dict_xsyn/expected/dict_xsyn.out
@ -0,0 +1,22 @@
 --
 -- first, define the datatype.  Turn off echoing so that expected file
 -- does not depend on contents of this file.
 --
 SET client_min_messages = warning;
 \set ECHO none
 RESET client_min_messages;
 --configuration
 ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
 --lexize
 SELECT ts_lexize('xsyn', 'supernova');
   ts_lexize    
 ----------------
 {sn,sne,1987a}
 (1 row)
 SELECT ts_lexize('xsyn', 'grb');
 ts_lexize 
 -----------
 (1 row)
--- a/contrib/dict_xsyn/sql/dict_xsyn.sql
+++ b/contrib/dict_xsyn/sql/dict_xsyn.sql
@ -0,0 +1,16 @@
 --
 -- first, define the datatype.  Turn off echoing so that expected file
 -- does not depend on contents of this file.
 --
 SET client_min_messages = warning;
 \set ECHO none
 \i dict_xsyn.sql
 \set ECHO all
 RESET client_min_messages;
 --configuration
 ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
 --lexize
 SELECT ts_lexize('xsyn', 'supernova');
 SELECT ts_lexize('xsyn', 'grb');
--- a/contrib/dict_xsyn/uninstall_dict_xsyn.sql
+++ b/contrib/dict_xsyn/uninstall_dict_xsyn.sql
@ -0,0 +1,9 @@
 SET search_path = public;
 DROP TEXT SEARCH DICTIONARY xsyn;
 DROP TEXT SEARCH TEMPLATE xsyn_template;
 DROP FUNCTION dxsyn_init(internal);
 DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal);
--- a/contrib/dict_xsyn/xsyn_sample.rules
+++ b/contrib/dict_xsyn/xsyn_sample.rules
@ -0,0 +1,6 @@
 # Sample rules file for eXtended Synonym (xsyn) dictionary
 # format is as follows:
 #
 # word synonym1 synonym2 ...
 #
 supernova sn sne 1987a
--- a/contrib/test_parser/Makefile
+++ b/contrib/test_parser/Makefile
@ -0,0 +1,19 @@
 # $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
 MODULE_big = test_parser
 OBJS = test_parser.o
 DATA_built = test_parser.sql
 DATA = uninstall_test_parser.sql
 DOCS = README.test_parser
 REGRESS = test_parser
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
 else
 subdir = contrib/test_parser
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
--- a/contrib/test_parser/README.test_parser
+++ b/contrib/test_parser/README.test_parser
@ -0,0 +1,52 @@
 Example parser
 ==============
 This is an example of a custom parser for full text search.
 It recognizes space-delimited words and returns only two token types:
 - 3,  word,  Word
 - 12, blank, Space symbols
 The token numbers have been chosen to keep compatibility with the default
 ts_headline() function, since we do not want to implement our own version.
 * Configuration
 The parser has no user-configurable parameters.
 * Usage
 1. Compile and install
 2. Load dictionary
   psql mydb < test_parser.sql
 3. Test it
   mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser');
    tokid | token
   -------+--------
        3 | That's
       12 |
        3 | my
       12 |
        3 | first
       12 |
        3 | own
       12 |
        3 | parser
   mydb# SELECT to_tsvector('testcfg','That''s my first own parser');
   to_tsvector
   -------------------------------------------------
   'my':2 'own':4 'first':3 'parser':5 'that''s':1
   mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
   headline
   -----------------------------------------------------------------
   Supernovae <b>stars</b> are the brightest phenomena in galaxies
 That's all.
--- a/contrib/test_parser/expected/test_parser.out
+++ b/contrib/test_parser/expected/test_parser.out
@ -0,0 +1,50 @@
 --
 -- first, define the parser.  Turn off echoing so that expected file
 -- does not depend on contents of this file.
 --
 SET client_min_messages = warning;
 \set ECHO none
 RESET client_min_messages;
 -- make test configuration using parser
 CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
 ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
 -- ts_parse
 SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
 tokid |         token         
 -------+-----------------------
     3 | That's
    12 |  
     3 | simple
    12 |  
     3 | parser
    12 |  
     3 | can't
    12 |  
     3 | parse
    12 |  
     3 | urls
    12 |  
     3 | like
    12 |  
     3 | http://some.url/here/
 (15 rows)
 SELECT to_tsvector('testcfg','That''s my first own parser');
                   to_tsvector                   
 -------------------------------------------------
 'my':2 'own':4 'first':3 'parser':5 'that''s':1
 (1 row)
 SELECT to_tsquery('testcfg', 'star');
 to_tsquery 
 ------------
 'star'
 (1 row)
 SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', 
       to_tsquery('testcfg', 'stars'));
                           ts_headline                           
 -----------------------------------------------------------------
 Supernovae <b>stars</b> are the brightest phenomena in galaxies
 (1 row)
--- a/contrib/test_parser/sql/test_parser.sql
+++ b/contrib/test_parser/sql/test_parser.sql
@ -0,0 +1,26 @@
 --
 -- first, define the parser.  Turn off echoing so that expected file
 -- does not depend on contents of this file.
 --
 SET client_min_messages = warning;
 \set ECHO none
 \i test_parser.sql
 \set ECHO all
 RESET client_min_messages;
 -- make test configuration using parser
 CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
 ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
 -- ts_parse
 SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
 SELECT to_tsvector('testcfg','That''s my first own parser');
 SELECT to_tsquery('testcfg', 'star');
 SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', 
       to_tsquery('testcfg', 'stars'));
--- a/contrib/test_parser/test_parser.c
+++ b/contrib/test_parser/test_parser.c
@ -0,0 +1,130 @@
 /*-------------------------------------------------------------------------
 *
 * test_parser.c
 *	  Simple example of a text search parser
 *
 * Copyright (c) 2007, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include "fmgr.h"
 PG_MODULE_MAGIC;
 /*
 * types
 */
 /* self-defined type */
 typedef struct {
 	char *	buffer; /* text to parse */
 	int		len;	/* length of the text in buffer */
 	int		pos;	/* position of the parser */
 } ParserState;
 /* copy-paste from wparser.h of tsearch2 */
 typedef struct {
 	int		lexid;
 	char	*alias;
 	char	*descr;
 } LexDescr;
 /*
 * prototypes
 */
 PG_FUNCTION_INFO_V1(testprs_start);
 Datum testprs_start(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(testprs_getlexeme);
 Datum testprs_getlexeme(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(testprs_end);
 Datum testprs_end(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(testprs_lextype);
 Datum testprs_lextype(PG_FUNCTION_ARGS);
 /*
 * functions
 */
 Datum testprs_start(PG_FUNCTION_ARGS)
 {
 	ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
 	pst->buffer = (char *) PG_GETARG_POINTER(0);
 	pst->len = PG_GETARG_INT32(1);
 	pst->pos = 0;
 	PG_RETURN_POINTER(pst);
 }
 Datum testprs_getlexeme(PG_FUNCTION_ARGS)
 {
 	ParserState *pst   = (ParserState *) PG_GETARG_POINTER(0);
 	char		**t	   = (char **) PG_GETARG_POINTER(1);
 	int			*tlen  = (int *) PG_GETARG_POINTER(2);
 	int			type;
 	*tlen = pst->pos;
 	*t = pst->buffer +	pst->pos;
 	if ((pst->buffer)[pst->pos] == ' ')
 	{
 		/* blank type */
 		type = 12;
 		/* go to the next non-white-space character */
 		while ((pst->buffer)[pst->pos] == ' ' &&
 			   pst->pos < pst->len)
 			(pst->pos)++;
 	} else {
 		/* word type */
 		type = 3;
 		/* go to the next white-space character */
 		while ((pst->buffer)[pst->pos] != ' ' &&
 			   pst->pos < pst->len)
 			(pst->pos)++;
 	}
 	*tlen = pst->pos - *tlen;
 	/* we are finished if (*tlen == 0) */
 	if (*tlen == 0)
 		type=0;
 	PG_RETURN_INT32(type);
 }
 Datum testprs_end(PG_FUNCTION_ARGS)
 {
 	ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
 	pfree(pst);
 	PG_RETURN_VOID();
 }
 Datum testprs_lextype(PG_FUNCTION_ARGS)
 {
 	/*
 	 * Remarks:
 	 * - we have to return the blanks for headline reason
 	 * - we use the same lexids like Teodor in the default
 	 * word parser; in this way we can reuse the headline
 	 * function of the default word parser.
 	 */
 	LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
 	/* there are only two types in this parser */
 	descr[0].lexid = 3;
 	descr[0].alias = pstrdup("word");
 	descr[0].descr = pstrdup("Word");
 	descr[1].lexid = 12;
 	descr[1].alias = pstrdup("blank");
 	descr[1].descr = pstrdup("Space symbols");
 	descr[2].lexid = 0;
 	PG_RETURN_POINTER(descr);
 }
--- a/contrib/test_parser/test_parser.sql.in
+++ b/contrib/test_parser/test_parser.sql.in
@ -0,0 +1,36 @@
 -- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
 -- Adjust this setting to control where the objects get created.
 SET search_path = public;
 BEGIN;
 CREATE FUNCTION testprs_start(internal, int4)
    RETURNS internal
    AS 'MODULE_PATHNAME'
    LANGUAGE C STRICT;
 CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
    RETURNS internal
    AS 'MODULE_PATHNAME'
    LANGUAGE C STRICT;
 CREATE FUNCTION testprs_end(internal)
    RETURNS void
    AS 'MODULE_PATHNAME'
    LANGUAGE C STRICT;
 CREATE FUNCTION testprs_lextype(internal)
    RETURNS internal
    AS 'MODULE_PATHNAME'
    LANGUAGE C STRICT;
 CREATE TEXT SEARCH PARSER testparser (
    START    = testprs_start,
    GETTOKEN = testprs_getlexeme,
    END      = testprs_end,
    HEADLINE = pg_catalog.prsd_headline,
    LEXTYPES = testprs_lextype
 );
 END;
--- a/contrib/test_parser/uninstall_test_parser.sql
+++ b/contrib/test_parser/uninstall_test_parser.sql
@ -0,0 +1,11 @@
 SET search_path = public;
 DROP TEXT SEARCH PARSER testparser;
 DROP FUNCTION testprs_start(internal, int4);
 DROP FUNCTION testprs_getlexeme(internal, internal, internal);
 DROP FUNCTION testprs_end(internal);
 DROP FUNCTION testprs_lextype(internal);