Add thesaurus dictionary which can replace N>0 lexemes by M>0 lexemes.

It required some changes in lexize algorithm, but interface with dictionaries stays compatible with old dictionaries. Funded by Georgia Public Library Service and LibLime, Inc.
20 years ago · 22505f4703
parent 3b7ed9ba9c
commit 22505f4703
13 changed files with 1257 additions and 129 deletions
--- a/contrib/tsearch2/Makefile
+++ b/contrib/tsearch2/Makefile
@ -1,13 +1,13 @@
-# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.15 2006/05/31 14:05:31 teodor Exp $
 MODULE_big = tsearch2
 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
-       dict_snowball.o dict_ispell.o dict_syn.o \
+       dict_snowball.o dict_ispell.o dict_syn.o dict_thesaurus.o \
       wparser.o wparser_def.o \
       ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
       tsvector_op.o rank.o ts_stat.o \
       query_util.o query_support.o query_rewrite.o query_gist.o \
-       ts_locale.o ginidx.o
+       ts_locale.o ts_lexize.o ginidx.o
 SUBDIRS     := snowball ispell wordparser
 SUBDIROBJS  := $(SUBDIRS:%=%/SUBSYS.o)
@ -16,7 +16,7 @@ OBJS	+= $(SUBDIROBJS)
 PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
-DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8
+DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 thesaurus
 DATA_built = tsearch2.sql untsearch2.sql
 DOCS = README.tsearch2
 REGRESS = tsearch2
--- a/contrib/tsearch2/common.c
+++ b/contrib/tsearch2/common.c
@ -5,6 +5,7 @@
 #include "catalog/pg_proc.h"
 #include "catalog/pg_namespace.h"
 #include "utils/syscache.h"
 #include "miscadmin.h"
 #include "ts_cfg.h"
 #include "dict.h"
@ -163,3 +164,23 @@ get_oidnamespace(Oid funcoid)
 	return nspoid;
 }
    /* if path is relative, take it as relative to share dir */
 char *
 to_absfilename(char *filename) {
 	if (!is_absolute_path(filename)) {
 		char        sharepath[MAXPGPATH];
 		char       *absfn;
 #ifdef  WIN32
 		char    delim = '\\';
 #else
 		char    delim = '/';
 #endif
 		get_share_path(my_exec_path, sharepath);
 		absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
 		sprintf(absfn, "%s%c%s", sharepath, delim, filename);
 		filename = absfn;
 	}
 	return filename;
 }
--- a/contrib/tsearch2/common.h
+++ b/contrib/tsearch2/common.h
@ -16,6 +16,8 @@ text	   *mtextdup(text *in);
 int			text_cmp(text *a, text *b);
 char * to_absfilename(char *filename);
 #define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 #define ARRNELEMS(x)  ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
--- a/contrib/tsearch2/dict.c
+++ b/contrib/tsearch2/dict.c
@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.12 2006/05/31 14:05:31 teodor Exp $ */
 /*
 * interface functions to dictionary
@ -50,16 +50,19 @@ init_dict(Oid id, DictInfo * dict)
 		Datum		opt;
 		Oid			oid = InvalidOid;
 		/* setup dictlexize method */
 		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
 		if (isnull || oid == InvalidOid)
 			ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
 		fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
 		/* setup and call dictinit method, optinally */
 		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
 		if (!(isnull || oid == InvalidOid))
 		{
 			opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
 			dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
 		}
 		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
 		if (isnull || oid == InvalidOid)
 			ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
 		fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
 		dict->dict_id = id;
 	}
 	else
@ -98,6 +101,29 @@ comparedict(const void *a, const void *b)
 	return (((DictInfo *) a)->dict_id < ((DictInfo *) b)->dict_id) ? -1 : 1;
 }
 static void
 insertdict(Oid id) {
 	DictInfo	newdict;
 	if (DList.len == DList.reallen)
 	{
 		DictInfo   *tmp;
 		int			reallen = (DList.reallen) ? 2 * DList.reallen : 16;
 		tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
 		if (!tmp)
 			ts_error(ERROR, "No memory");
 		DList.reallen = reallen;
 		DList.list = tmp;
 	}
 	init_dict(id, &newdict);
 	DList.list[DList.len] = newdict;
 	DList.len++;
 	qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
 }
 DictInfo *
 finddict(Oid id)
 {
@ -117,23 +143,8 @@ finddict(Oid id)
 			return DList.last_dict;
 	}
-	/* last chance */
+	/* insert new dictionary */ 
-	if (DList.len == DList.reallen)
+	insertdict(id);
 	{
 		DictInfo   *tmp;
 		int			reallen = (DList.reallen) ? 2 * DList.reallen : 16;
 		tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
 		if (!tmp)
 			ts_error(ERROR, "No memory");
 		DList.reallen = reallen;
 		DList.list = tmp;
 	}
 	DList.last_dict = &(DList.list[DList.len]);
 	init_dict(id, DList.last_dict);
 	DList.len++;
 	qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
 	return finddict(id); /* qsort changed order!! */ ;
 }
@ -190,17 +201,32 @@ lexize(PG_FUNCTION_ARGS)
 			   *ptr;
 	Datum	   *da;
 	ArrayType  *a;
 	DictSubState	dstate = { false, false, NULL };
 	SET_FUNCOID();
 	dict = finddict(PG_GETARG_OID(0));
 	ptr = res = (TSLexeme *) DatumGetPointer(
-										  FunctionCall3(&(dict->lexize_info),
+										FunctionCall4(&(dict->lexize_info),
 										PointerGetDatum(dict->dictionary),
 										PointerGetDatum(VARDATA(in)),
 										Int32GetDatum(VARSIZE(in) - VARHDRSZ),
 										PointerGetDatum(&dstate)
 														)
 		);
 	if (dstate.getnext)  {
 		dstate.isend = true;	
 		ptr = res = (TSLexeme *) DatumGetPointer(
 										FunctionCall4(&(dict->lexize_info),
 										   PointerGetDatum(dict->dictionary),
 												PointerGetDatum(VARDATA(in)),
-										Int32GetDatum(VARSIZE(in) - VARHDRSZ)
+										Int32GetDatum(VARSIZE(in) - VARHDRSZ),
 										PointerGetDatum(&dstate)
 														)
 		);
 	}
 	PG_FREE_IF_COPY(in, 1);
 	if (!res)
 	{
--- a/contrib/tsearch2/dict.h
+++ b/contrib/tsearch2/dict.h
@ -1,9 +1,10 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.6 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */
 #ifndef __DICT_H__
 #define __DICT_H__
 #include "postgres.h"
 #include "fmgr.h"
 #include "ts_cfg.h"
 typedef struct
 {
@ -29,6 +30,11 @@ DictInfo   *finddict(Oid id);
 Oid			name2id_dict(text *name);
 void		reset_dict(void);
 typedef struct {
 	bool isend; /* in: marks for lexize_info about text end is reached */
 	bool getnext; /* out: dict wants next lexeme */
 	void	*private;  /* internal dict state between calls with getnext == true */
 } DictSubState;
 /* simple parser of cfg string */
 typedef struct
@ -45,17 +51,61 @@ typedef struct
 	/*
 	 * number of variant of split word , for example Word 'fotballklubber'
 	 * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
-	 * ball, klubb ). So, dictionary should return: nvariant	lexeme 1
+	 * ball, klubb ). So, dictionary should return: 
-	 * fotball 1	   klubb 2		 fot 2		 ball 2		  klubb
+	 * nvariant	lexeme 
-	 *
+	 *   1 		fotball 
 	 *   1	   	klubb 
 	 *	 2		fot 
 	 *	 2		ball 
 	 *   2		klubb
 	 */
 	uint16		nvariant;
 	/* currently unused */
 	uint16		flags;
 	/* C-string */
 	char	   *lexeme;
 }	TSLexeme;
 #define TSL_ADDPOS		0x01
 /*
 * Lexize subsystem
 */
 typedef struct ParsedLex {
    int     	type;
    char    	*lemm;
    int     	lenlemm;
 	bool		resfollow;
    struct ParsedLex *next;
 } ParsedLex;
 typedef struct ListParsedLex {
 	ParsedLex	*head;
 	ParsedLex	*tail;
 } ListParsedLex;
 typedef struct {
    TSCfgInfo       *cfg;
    Oid             curDictId;
    int             posDict;
    DictSubState    dictState;
    ParsedLex       *curSub;
 	ListParsedLex	towork;   /* current list to work */
 	ListParsedLex	waste;    /* list of lexemes that already lexized */
 	/* fields to store last variant to lexize (basically, thesaurus 
 	   or similar to, which wants  several lexemes */	
 	ParsedLex		*lastRes;
 	TSLexeme		*tmpRes;
 } LexizeData;
 void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
 void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
 TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);
 #endif
--- a/contrib/tsearch2/dict_thesaurus.c
+++ b/contrib/tsearch2/dict_thesaurus.c
@ -0,0 +1,743 @@
 /* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.1 2006/05/31 14:05:31 teodor Exp $ */
 /*
 * thesaurus
 * Teodor Sigaev <teodor@sigaev.ru>
 */
 #include "postgres.h"
 #include "executor/spi.h"
 #include <ctype.h>
 #include "dict.h"
 #include "common.h"
 #include "ts_locale.h"
 typedef struct LexemeInfo {
 	uint16	idsubst; /* entry's number in DictThesaurus->subst */
 	uint16	posinsubst; /* pos info in entry */
 	uint16	tnvariant;  /* total num lexemes in one variant */
 	struct LexemeInfo *nextentry;
 	struct LexemeInfo *nextvariant;
 } LexemeInfo;
 typedef struct {
 	char 		*lexeme;
 	LexemeInfo	*entries;
 } TheLexeme; 
 typedef struct {
 	uint16	lastlexeme; /* number lexemes to substitute */
 	uint16	reslen;
 	TSLexeme	*res;   /* prepared substituted result */ 
 } TheSubstitute;
 typedef struct
 {
 	/* subdictionary to normalize lexemes */	
 	DictInfo	subdict;
 	/* Array to search lexeme by exact match */
 	TheLexeme	*wrds;
 	int			nwrds;
 	int			ntwrds;
 	/* Storage of substituted result, n-th element is for
 	   n-th expression */
 	TheSubstitute	*subst;
 	int				nsubst;
 }	DictThesaurus;
 PG_FUNCTION_INFO_V1(thesaurus_init);
 Datum		thesaurus_init(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(thesaurus_lexize);
 Datum		thesaurus_lexize(PG_FUNCTION_ARGS);
 static void
 freeDictThesaurus(DictThesaurus * d)
 {
 	free(d);
 }
 static void
 newLexeme( DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 posinsubst ) {
 	TheLexeme	*ptr;
 	if ( d->nwrds >= d->ntwrds ) {
 		if ( d->ntwrds == 0 ) {
 			d->ntwrds = 16;
 			d->wrds = (TheLexeme*)malloc(sizeof(TheLexeme) * d->ntwrds);
 		} else {
 			d->ntwrds *= 2;
 			d->wrds = (TheLexeme*)realloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
 		}
 		if (!d->wrds)
 			elog(ERROR,"Out of memory");
 	}
 	ptr = d->wrds + d->nwrds;
 	d->nwrds++;
 	if  ( (ptr->lexeme = malloc(e-b+1)) == NULL )
 		elog(ERROR,"Out of memory");
 	memcpy(ptr->lexeme, b, e-b);
 	ptr->lexeme[e-b] = '\0';
 	if  ( (ptr->entries = (LexemeInfo*)malloc( sizeof(LexemeInfo) ))==NULL )
 		elog(ERROR,"Out of memory");
 	ptr->entries->nextentry=NULL;
 	ptr->entries->idsubst = idsubst;
 	ptr->entries->posinsubst = posinsubst;
 }
 static void
 addWrd( DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst ) {
 	static	int nres=0;
 	static  int ntres = 0;
 	TheSubstitute	*ptr;
 	if ( nwrd == 0 ) {
 		nres = ntres = 0;
 		if ( idsubst <= d->nsubst ) {
 			if ( d->nsubst == 0 ) {
 				d->nsubst = 16;
 				d->subst = (TheSubstitute*)malloc(sizeof(TheSubstitute) * d->nsubst);
 			} else {
 				d->nsubst *= 2;
 				d->subst = (TheSubstitute*)realloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
 			}
 			if (!d->subst)
 				elog(ERROR,"Out of memory");
 		}
 	}
 	ptr = d->subst + idsubst;
 	ptr->lastlexeme = posinsubst-1;
 	if ( nres+1 >= ntres ) {
 		if ( ntres == 0 ) {
 			ntres = 2;
 			ptr->res = (TSLexeme*)malloc( sizeof(TSLexeme) * ntres );
 		} else {
 			ntres *= 2;
 			ptr->res = (TSLexeme*)realloc( ptr->res, sizeof(TSLexeme) * ntres );
 		}
 		if ( !ptr->res ) 
 				elog(ERROR,"Out of memory");
 	}
 	if ( (ptr->res[ nres ].lexeme = malloc(e-b+1))==0 ) 
 		elog(ERROR,"Out of memory");
 	memcpy(ptr->res[ nres ].lexeme, b, e-b);
 	ptr->res[ nres ].lexeme[e-b] = '\0';
 	ptr->res[ nres ].nvariant = nwrd;
 	ptr->res[ nres ].flags = TSL_ADDPOS;
 	ptr->res[ ++nres ].lexeme = NULL;
 }
 #define TR_WAITLEX	1
 #define TR_INLEX	2
 #define	TR_WAITSUBS	3
 #define TR_INSUBS	4
 static void
 thesaurusRead( char *filename, DictThesaurus *d ) {
 	FILE *fh;
 	char str[BUFSIZ];
 	int lineno=0;
 	uint16	idsubst = 0;
 	fh = fopen(to_absfilename(filename), "r");
 	if (!fh)
 		elog(ERROR,"Thesaurus: can't open '%s' file", filename);
 	while( fgets(str, sizeof(str), fh)) {
 		char *ptr = str;
 		int state = TR_WAITLEX;
 		char	*beginwrd = NULL;
 		uint16	posinsubst=0;
 		uint16	nwrd=0;
 		lineno++;
 		/* is it comment ? */
 		while( t_isspace(ptr) )
 			ptr += pg_mblen(ptr);
 		if ( t_iseq(str, '#') || *str=='\0' || t_iseq(str, '\n') || t_iseq(str, '\r') )
 			continue;
 		pg_verifymbstr(ptr, strlen(ptr), false);
 		while(*ptr) {
 			if ( state == TR_WAITLEX ) {
 				if ( t_iseq(ptr, ':' ) ) {
 					if ( posinsubst == 0 ) {
 						fclose(fh);
 						elog(ERROR, "Thesaurus: Unexpected delimiter at %d line", lineno);
 					}
 					state = TR_WAITSUBS;
 				} else if ( !t_isspace(ptr) ) {
 					beginwrd = ptr;
 					state = TR_INLEX;
 				}
 			} else if ( state == TR_INLEX ) {
 				if ( t_iseq(ptr, ':') ) {
 					newLexeme( d, beginwrd, ptr, idsubst, posinsubst++ );
 					state = TR_WAITSUBS;
 				} else if ( t_isspace(ptr) ) {
 					newLexeme( d, beginwrd, ptr, idsubst, posinsubst++ );
 					state = TR_WAITLEX;
 				}
 			} else if ( state == TR_WAITSUBS ) {
 				if ( !t_isspace(ptr) ) { 
 					beginwrd = ptr;
 					state = TR_INSUBS;
 				}
 			} else if ( state == TR_INSUBS ) {
 				if ( t_isspace(ptr) ) { 
 					addWrd( d, beginwrd, ptr, idsubst, nwrd++, posinsubst );
 					state = TR_WAITSUBS;
 				}
 			} else
 				elog(ERROR,"Thesaurus: Unknown state: %d", state);
 			ptr += pg_mblen(ptr);
 		}
 		if ( state == TR_INSUBS )
 			addWrd( d, beginwrd, ptr, idsubst, nwrd++, posinsubst );
 		idsubst++;
 		if ( !(nwrd && posinsubst) ) {
 			fclose(fh);
 			elog(ERROR, "Thesaurus: Unexpected end of line at %d line", lineno);
 		}
 	}
 	d->nsubst = idsubst;
 	fclose(fh);
 }
 static TheLexeme*
 addCompiledLexeme(TheLexeme   *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo* src, uint16 tnvariant) {
 	if ( *nnw >= *tnm ) {
 		*tnm *= 2;
 		newwrds = (TheLexeme*)realloc( newwrds, sizeof(TheLexeme) * *tnm);
 		if (!newwrds)
 			elog(ERROR,"Out of memory");
 	}
 	newwrds[ *nnw ].entries = (LexemeInfo*)malloc( sizeof(LexemeInfo) );
 	if (!newwrds[ *nnw ].entries)
 		elog(ERROR,"Out of memory");
 	if ( lexeme && lexeme->lexeme ) {
 		newwrds[ *nnw ].lexeme = strdup( lexeme->lexeme );
 		if ( !newwrds[ *nnw ].lexeme )
 			elog(ERROR,"Out of memory");
 		newwrds[ *nnw ].entries->tnvariant = tnvariant;
 	} else {
 		newwrds[ *nnw ].lexeme = NULL;
 		newwrds[ *nnw ].entries->tnvariant = 1;
 	}
 	newwrds[ *nnw ].entries->idsubst = src->idsubst;
 	newwrds[ *nnw ].entries->posinsubst = src->posinsubst;
 	newwrds[ *nnw ].entries->nextentry = NULL;
 	(*nnw)++;
 	return newwrds;
 }
 static int
 cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b) {
 	if ( a==NULL || b==NULL )
 		return 0;
 	if ( a->idsubst == b->idsubst ) {
 		if ( a->posinsubst == b->posinsubst ) {
 			if ( a->tnvariant == b->tnvariant ) 
 					return 0;
 			return ( a->tnvariant > b->tnvariant ) ? 1 : -1;
 		}
 		return ( a->posinsubst > b->posinsubst ) ? 1 : -1;
 	}
 	return ( a->idsubst > b->idsubst ) ? 1 : -1;
 }
 static int
 cmpLexeme(TheLexeme *a, TheLexeme* b) {
 	if ( a->lexeme == NULL ) {
 		if ( b->lexeme == NULL )
 			return 0;
 		else
 			return 1;
 	} else if ( b->lexeme == NULL )
 		return -1;
 	return strcmp( a->lexeme, b->lexeme );
 }
 static int
 cmpLexemeQ(const void *a, const void *b) {
 	return cmpLexeme( (TheLexeme*)a, (TheLexeme*)b ); 
 }
 static int cmpTheLexeme(const void *a, const void *b) {
 	TheLexeme *la  = (TheLexeme*)a;
 	TheLexeme *lb  = (TheLexeme*)b;
 	int res;
 	if ( (res=cmpLexeme(la, lb)) != 0 )
 		return res;
 	return -cmpLexemeInfo(la->entries, lb->entries);
 }
 static void
 compileTheLexeme(DictThesaurus *d) {
 	int			i,nnw=0, tnm=16;
 	TheLexeme	*newwrds = (TheLexeme*)malloc(sizeof(TheLexeme)*tnm), *ptrwrds;
 	if (!newwrds) 
 		elog(ERROR,"Out of memory");
 	for(i=0;i<d->nwrds;i++) {
 		TSLexeme *ptr = (TSLexeme*) DatumGetPointer( 
 				FunctionCall4(
 					&(d->subdict.lexize_info),
 					PointerGetDatum(d->subdict.dictionary),
 					PointerGetDatum(d->wrds[i].lexeme),
 					Int32GetDatum(strlen(d->wrds[i].lexeme)),
 					PointerGetDatum(NULL)
 				)
 			);
 		if ( !(ptr && ptr->lexeme) ) {
 			newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
 			elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, assign any non-recognized word", d->wrds[i].lexeme);
 		} else {
 			while( ptr->lexeme ) {
 				TSLexeme	*remptr = ptr+1;
 				int tnvar = 1;
 				int	curvar = ptr->nvariant;
 				/* compute n words in one variant */
 				while( remptr->lexeme ) {
 					if ( remptr->nvariant != (remptr-1)->nvariant )
 						break;
 					tnvar++;
 					remptr++;
 				}
 				remptr = ptr;
 				while( remptr->lexeme && remptr->nvariant == curvar ) {
 					newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); 
 					remptr++;
 				}
 				ptr = remptr;
 			}
 		}
 		free( d->wrds[i].lexeme );
 		free( d->wrds[i].entries );
 	}
 	free( d->wrds );
 	d->wrds = newwrds;
 	d->nwrds = nnw;
 	d->ntwrds = tnm;
 	if ( d->nwrds > 1 ) {
 		qsort( d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme ); 
 		/* uniq */
 		newwrds = d->wrds;
 		ptrwrds = d->wrds + 1;
 		while( ptrwrds - d->wrds < d->nwrds ) {
 			if ( cmpLexeme( ptrwrds, newwrds ) == 0 ) {
 				if ( cmpLexemeInfo(ptrwrds->entries, newwrds->entries) ) {
 					ptrwrds->entries->nextentry = newwrds->entries;
 					newwrds->entries = ptrwrds->entries;
 				} else
 					free( ptrwrds->entries );
 				if ( ptrwrds->lexeme )
 					free( ptrwrds->lexeme );
 			} else {
 				newwrds++;
 				*newwrds = *ptrwrds;
 			}
 			ptrwrds++;
 		}
 		d->nwrds = newwrds - d->wrds + 1;
 		d->wrds = (TheLexeme*)realloc( d->wrds, sizeof(TheLexeme) * d->nwrds );
 	}
 }
 static void
 compileTheSubstitute(DictThesaurus *d) {
 	int i;
 	for(i=0;i<d->nsubst;i++) {
 		TSLexeme	*rem = d->subst[i].res, *outptr, *inptr;
 		int			n=2;
 		outptr = d->subst[i].res = (TSLexeme*)malloc( sizeof(TSLexeme) * n );
 		if ( d->subst[i].res == NULL )
 			elog(ERROR,"Out of Memory");
 		outptr->lexeme = NULL;
 		inptr = rem;
 		while( inptr && inptr->lexeme ) { 
 			TSLexeme	*reml, *lexized = (TSLexeme*) DatumGetPointer( 
 				FunctionCall4(
 					&(d->subdict.lexize_info),
 					PointerGetDatum(d->subdict.dictionary),
 					PointerGetDatum(inptr->lexeme),
 					Int32GetDatum(strlen(inptr->lexeme)),
 					PointerGetDatum(NULL)
 				)
 			);
 			reml = lexized;
 			if ( lexized ) {
 				int toset = (lexized->lexeme && outptr != d->subst[i].res ) ? (outptr - d->subst[i].res)  : -1;
 				while( lexized->lexeme ) {
 					if ( outptr - d->subst[i].res + 1 >= n ) {
 						int diff = outptr - d->subst[i].res;
 						n *= 2;
 						d->subst[i].res = (TSLexeme*)realloc( d->subst[i].res, sizeof(TSLexeme) * n );
 						if ( d->subst[i].res == NULL )
 							elog(ERROR,"Out of Memory");
 						outptr = d->subst[i].res + diff;
 					}
 					*outptr = *lexized;
 					if ( (outptr->lexeme = strdup(lexized->lexeme)) == NULL )
 						elog(ERROR,"Out of Memory");
 					outptr++;
 					lexized++;
 				}
 				if ( toset > 0)
 					d->subst[i].res[toset].flags |= TSL_ADDPOS;
 			}
 			if ( inptr->lexeme )
 				free( inptr->lexeme );
 			inptr++;
 		}
 		d->subst[i].reslen = outptr - d->subst[i].res;
 		free(rem);
 	}
 }
 Datum
 thesaurus_init(PG_FUNCTION_ARGS)
 {
 	DictThesaurus *d;
 	Map		   *cfg,
 			   *pcfg;
 	text	   *in, *subdictname=NULL;
 	bool 		fileloaded = false;
 	if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_CONFIG_FILE_ERROR),
 				 errmsg("Thesaurus confguration error")));
 	d = (DictThesaurus *) malloc(sizeof(DictThesaurus));
 	if (!d)
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));
 	memset(d, 0, sizeof(DictThesaurus));
 	in = PG_GETARG_TEXT_P(0);
 	parse_cfgdict(in, &cfg);
 	PG_FREE_IF_COPY(in, 0);
 	pcfg = cfg;
 	while (pcfg->key)
 	{
 		if (pg_strcasecmp("DictFile", pcfg->key) == 0)
 		{
 			if (fileloaded)
 			{
 				freeDictThesaurus(d);
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 						 errmsg("Thesaurus file is already loaded")));
 			}
 			fileloaded = true;
 			thesaurusRead( pcfg->value, d );
 		}
 		else if (pg_strcasecmp("Dictionary", pcfg->key) == 0)
 		{
 			if (subdictname)
 			{
 				freeDictThesaurus(d);
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 						 errmsg("Thesaurus: SubDictionary is already defined")));
 			}
 			subdictname = char2text( pcfg->value );
 		}
 		else
 		{
 			freeDictThesaurus(d);
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("unrecognized option: %s => %s",
 							pcfg->key, pcfg->value)));
 		}
 		pfree(pcfg->key);
 		pfree(pcfg->value);
 		pcfg++;
 	}
 	pfree(cfg);
 	if (!fileloaded)
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("Thesaurus file  isn't defined")));
 	if ( subdictname ) {
 		DictInfo	*subdictptr;
 		/* 
 		 * we already in SPI, but name2id_dict()/finddict()
 		 * invoke SPI_connect()
 		 */
 		SPI_push(); 
 		subdictptr = finddict( name2id_dict( subdictname ) );
 		SPI_pop();
 		d->subdict = *subdictptr;
 	} else 
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("Thesaurus: SubDictionary isn't defined")));
 	compileTheLexeme( d );
 	compileTheSubstitute(d);
 	PG_RETURN_POINTER(d);
 }
 static LexemeInfo*
 findTheLexeme(DictThesaurus *d, char * lexeme) {
 	TheLexeme key = { lexeme, NULL }, *res;
 	if ( d->nwrds == 0 )
 		return NULL;
 	res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
 	if ( res == NULL )
 		return NULL;
 	return res->entries;
 }
 static bool
 matchIdSubst(LexemeInfo *stored, uint16 idsubst) {
 	bool res = true;
 	if (stored) {
 		res = false;
 		for(; stored; stored=stored->nextvariant) 
 			if ( stored->idsubst == idsubst ) {
 				res = true;
 				break;
 			}
 	}
 	return res;
 }
 static LexemeInfo*
 findVariant( LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn) {
 	for(;;) {
 		int i;
 		LexemeInfo *ptr = newin[0];
 		for(i=0; i<newn; i++) {
 			while(newin[i] && newin[i]->idsubst < ptr->idsubst) 
 				newin[i] = newin[i]->nextentry;
 			if ( newin[i] == NULL )
 				return in;
 			if ( newin[i]->idsubst > ptr->idsubst ) {
 				ptr = newin[i];
 				i=-1;
 				continue;
 			}
 			while(newin[i]->idsubst == ptr->idsubst) {
 				if ( newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn ) {
 					ptr = newin[i];
 					break;
 				}
 				newin[i] = newin[i]->nextentry;
 				if ( newin[i] == NULL )
 					return in;
 			}
 			if ( newin[i]->idsubst != ptr->idsubst ) {
 				ptr = newin[i];
 				i=-1;
 				continue;
 			}
 		}
 		if ( i==newn && matchIdSubst(stored, ptr->idsubst) && (in==NULL || !matchIdSubst(in, ptr->idsubst)) ) { /* found */
 			ptr->nextvariant = in;
 			in = ptr;
 		}
 		/* step forward */
 		for(i=0; i<newn; i++)
 			newin[i] = newin[i]->nextentry;
 	}
 	return NULL;
 }
 static TSLexeme*
 copyTSLexeme( TheSubstitute *ts ) {
 	TSLexeme	*res;
 	uint16 i;
 	res = (TSLexeme*)palloc( sizeof(TSLexeme) * (ts->reslen+1) );
 	for(i=0;i<ts->reslen;i++) {	
 		res[i] = ts->res[i];
 		res[i].lexeme = pstrdup( ts->res[i].lexeme );
 	}
 	res[ts->reslen].lexeme = NULL;
 	return res;
 }
 static TSLexeme*
 checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres) {
 	*moreres = false;
 	while(info) {
 		Assert( info->idsubst < d->nsubst );
 		if ( info->nextvariant )
 			*moreres = true;
 		if ( d->subst[ info->idsubst ].lastlexeme == curpos ) 
 			return copyTSLexeme( d->subst + info->idsubst );
 		info = info->nextvariant;
 	}
 	return NULL;
 }
 Datum
 thesaurus_lexize(PG_FUNCTION_ARGS)
 {
 	DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
 	DictSubState	*dstate = (DictSubState*)PG_GETARG_POINTER(3);
 	TSLexeme	*res=NULL;
 	LexemeInfo *stored, *info = NULL;
 	uint16	curpos = 0;
 	bool	moreres = false;
 	if ( dstate == NULL || PG_NARGS() < 4 )
 		elog(ERROR,"Forbidden call of thesaurus or nested call");
 	if ( dstate->isend ) 
 		PG_RETURN_POINTER(NULL);
 	stored = (LexemeInfo*) dstate->private;
 	if (stored) 
 		curpos = stored->posinsubst+1;
 	res =(TSLexeme*) DatumGetPointer (
 		FunctionCall4(
 			&(d->subdict.lexize_info),
 			PointerGetDatum(d->subdict.dictionary),
 			PG_GETARG_DATUM(1),
 			PG_GETARG_INT32(2),
 			PointerGetDatum(NULL)
 		)
 	);
 	if ( res && res->lexeme ) {
 		TSLexeme	*ptr = res , *basevar;
 		while( ptr->lexeme ) {
 			uint16		nv = ptr->nvariant;
 			uint16		i,nlex = 0;
 			LexemeInfo	**infos;
 			basevar = ptr;
 			while( ptr->lexeme && nv == ptr->nvariant ) {
 				nlex++;
 				ptr++;
 			}
 			infos = (LexemeInfo**)palloc(sizeof(LexemeInfo*)*nlex);
 			for(i=0;i<nlex;i++) 
 				if ( (infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL )
 					break;
 			if ( i<nlex ) { 
 				/* no chance to find */
 				pfree( infos );
 				continue;
 			}
 			info = findVariant( info, stored, curpos, infos, nlex);
 		}
 	} else {
 		LexemeInfo	*infos = findTheLexeme(d, NULL);
 		info = findVariant( NULL, stored, curpos, &infos, 1);
 	}
 	dstate->private = (void*)info;
 	if ( !info ) {
 		dstate->getnext = false;
 		PG_RETURN_POINTER(NULL);
 	}
 	if ( (res=checkMatch(d, info, curpos,&moreres)) != NULL ) {
 		dstate->getnext = moreres;
 		PG_RETURN_POINTER(res);
 	}
 	dstate->getnext = true;
 	PG_RETURN_POINTER(NULL);	
 }
--- a/contrib/tsearch2/expected/tsearch2.out
+++ b/contrib/tsearch2/expected/tsearch2.out
@ -4,21 +4,21 @@
 --
 \set ECHO none
 psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_dict_pkey" for table "pg_ts_dict"
-psql:tsearch2.sql:158: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
+psql:tsearch2.sql:177: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
-psql:tsearch2.sql:257: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
+psql:tsearch2.sql:276: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
-psql:tsearch2.sql:264: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
+psql:tsearch2.sql:283: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
-psql:tsearch2.sql:370: NOTICE:  type "tsvector" is not yet defined
+psql:tsearch2.sql:389: NOTICE:  type "tsvector" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:375: NOTICE:  argument type tsvector is only a shell
+psql:tsearch2.sql:394: NOTICE:  argument type tsvector is only a shell
-psql:tsearch2.sql:429: NOTICE:  type "tsquery" is not yet defined
+psql:tsearch2.sql:448: NOTICE:  type "tsquery" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:434: NOTICE:  argument type tsquery is only a shell
+psql:tsearch2.sql:453: NOTICE:  argument type tsquery is only a shell
-psql:tsearch2.sql:592: NOTICE:  type "gtsvector" is not yet defined
+psql:tsearch2.sql:611: NOTICE:  type "gtsvector" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:597: NOTICE:  argument type gtsvector is only a shell
+psql:tsearch2.sql:616: NOTICE:  argument type gtsvector is only a shell
-psql:tsearch2.sql:1087: NOTICE:  type "gtsq" is not yet defined
+psql:tsearch2.sql:1106: NOTICE:  type "gtsq" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:1092: NOTICE:  argument type gtsq is only a shell
+psql:tsearch2.sql:1111: NOTICE:  argument type gtsq is only a shell
 --tsvector
 SELECT '1'::tsvector;
 tsvector 
--- a/contrib/tsearch2/stopword.c
+++ b/contrib/tsearch2/stopword.c
@ -4,8 +4,6 @@
 */
 #include "postgres.h"
 #include "miscadmin.h"
 #include "common.h"
 #include "dict.h"
 #include "ts_locale.h"
@ -36,30 +34,11 @@ readstoplist(text *in, StopList * s)
 	s->len = 0;
 	if (in && VARSIZE(in) - VARHDRSZ > 0)
 	{
-		char	   *filename = text2char(in);
+		char	   *filename = to_absfilename(text2char(in));
 		FILE	   *hin;
 		char		buf[STOPBUFLEN];
 		int			reallen = 0;
 		/* if path is relative, take it as relative to share dir */
 		if (!is_absolute_path(filename))
 		{
 			char		sharepath[MAXPGPATH];
 			char	   *absfn;
 #ifdef	WIN32
 			char	delim = '\\';
 #else
 			char 	delim = '/';
 #endif
 			get_share_path(my_exec_path, sharepath);
 			absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
 			sprintf(absfn, "%s%c%s", sharepath, delim, filename);
 			pfree(filename);
 			filename = absfn;
 		}
 		if ((hin = fopen(filename, "r")) == NULL)
 			ereport(ERROR,
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),
--- a/contrib/tsearch2/thesaurus
+++ b/contrib/tsearch2/thesaurus
@ -0,0 +1,19 @@
 #
 # Theasurus config file. Character ':' splits
 # string to part: 
 #     to be substituted string
 #     substituting string
 #
 #one two three : 123
 #one two : 12
 #one : 1
 #two : 2
 #foo bar : blah blah
 #f   bar : fbar
 #e   bar : ebar
 #g   bar bar : gbarbar
 #asd:sdffff
 #qwerty:qwer wert erty
--- a/contrib/tsearch2/ts_cfg.c
+++ b/contrib/tsearch2/ts_cfg.c
@ -281,15 +281,15 @@ name2id_cfg(text *name)
 	return id;
 }
 void
 parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 {
 	int			type,
-				lenlemm,
+				lenlemm;
 				i;
 	char	   *lemm = NULL;
 	WParserInfo *prsobj = findprs(cfg->prs_id);
 	LexizeData	ldata;
 	TSLexeme   *norms;
 	prsobj->prs = (void *) DatumGetPointer(
 										   FunctionCall2(
@ -299,14 +299,16 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 														 )
 		);
-	while ((type = DatumGetInt32(FunctionCall3(
+	LexizeInit(&ldata, cfg);
 	do {
 		type = DatumGetInt32(FunctionCall3(
 											   &(prsobj->getlexeme_info),
 											   PointerGetDatum(prsobj->prs),
 											   PointerGetDatum(&lemm),
-										   PointerGetDatum(&lenlemm)))) != 0)
+										   PointerGetDatum(&lenlemm)));
 	{
-		if (lenlemm >= MAXSTRLEN)
+		if (type>0 && lenlemm >= MAXSTRLEN)
 		{
 #ifdef IGNORE_LONGLEXEME
 			ereport(NOTICE,
@ -320,25 +322,11 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 #endif
 		}
-		if (type >= cfg->len)	/* skip this type of lexeme */
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
 			continue;
-		for (i = 0; i < cfg->map[type].len; i++)
+		while(  (norms = LexizeExec(&ldata, NULL)) != NULL )
 		{
-			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
+			TSLexeme *ptr = norms;
 			TSLexeme   *norms,
 					   *ptr;
 			norms = ptr = (TSLexeme *) DatumGetPointer(
 													   FunctionCall3(
 														&(dict->lexize_info),
 										   PointerGetDatum(dict->dictionary),
 													   PointerGetDatum(lemm),
 													 PointerGetDatum(lenlemm)
 																	 )
 				);
 			if (!norms)			/* dictionary doesn't know this lexeme */
 				continue;
 			prs->pos++;			/* set pos */
@ -350,6 +338,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 					prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
 				}
 				if ( ptr->flags & TSL_ADDPOS )
 					prs->pos++;
 				prs->words[prs->curwords].len = strlen(ptr->lexeme);
 				prs->words[prs->curwords].word = ptr->lexeme;
 				prs->words[prs->curwords].nvariant = ptr->nvariant;
@ -359,9 +349,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 				prs->curwords++;
 			}
 			pfree(norms);
 			break;				/* lexeme already normalized or is stop word */
 		}
 	}
 	} while(type>0);
 	FunctionCall1(
 				  &(prsobj->end_info),
@ -417,14 +406,47 @@ hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
 	}
 }
 static void
 addHLParsedLex(HLPRSTEXT *prs, QUERYTYPE * query, ParsedLex *lexs, TSLexeme *norms) {
 	ParsedLex	*tmplexs;
 	TSLexeme *ptr;
 	while( lexs ) {
 		if ( lexs->type > 0 ) 
 			hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
 		ptr = norms;
 		while( ptr && ptr->lexeme ) {
 			hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
 			ptr++;
 		}
 		tmplexs = lexs->next;
 		pfree( lexs );
 		lexs = tmplexs;
 	}
 	if ( norms ) {
 		ptr = norms;
 		while( ptr->lexeme ) {
 			pfree( ptr->lexeme );
 			ptr++;
 		}
 		pfree(norms);
 	}
 }
 void
 hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
 {
 	int			type,
-				lenlemm,
+				lenlemm;
 				i;
 	char	   *lemm = NULL;
 	WParserInfo *prsobj = findprs(cfg->prs_id);
 	LexizeData	ldata;
 	TSLexeme	*norms;
 	ParsedLex	*lexs;
 	prsobj->prs = (void *) DatumGetPointer(
 										   FunctionCall2(
@ -434,14 +456,16 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
 														 )
 		);
-	while ((type = DatumGetInt32(FunctionCall3(
+	LexizeInit(&ldata, cfg);
 	do {
 		type = DatumGetInt32(FunctionCall3(
 											   &(prsobj->getlexeme_info),
 											   PointerGetDatum(prsobj->prs),
 											   PointerGetDatum(&lemm),
-										   PointerGetDatum(&lenlemm)))) != 0)
+									PointerGetDatum(&lenlemm)));
 	{
-		if (lenlemm >= MAXSTRLEN)
+		if (type>0 && lenlemm >= MAXSTRLEN)
 		{
 #ifdef IGNORE_LONGLEXEME
 			ereport(NOTICE,
@ -455,38 +479,16 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
 #endif
 		}
-		hladdword(prs, lemm, lenlemm, type);
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
-		if (type >= cfg->len)
+		do {
-			continue;
+			if ( (norms = LexizeExec(&ldata,&lexs)) != NULL ) 
-
+				addHLParsedLex(prs, query, lexs, norms);
-		for (i = 0; i < cfg->map[type].len; i++)
+			else 
-		{
+				addHLParsedLex(prs, query, lexs, NULL);
-			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
+		} while( norms );
 			TSLexeme   *norms,
 					   *ptr;
 			norms = ptr = (TSLexeme *) DatumGetPointer(
 													   FunctionCall3(
 														&(dict->lexize_info),
 										   PointerGetDatum(dict->dictionary),
 													   PointerGetDatum(lemm),
 													 PointerGetDatum(lenlemm)
 																	 )
 				);
 			if (!norms)			/* dictionary doesn't know this lexeme */
 				continue;
-			while (ptr->lexeme)
+	} while( type>0 );
 			{
 				hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
 				pfree(ptr->lexeme);
 				ptr++;
 			}
 			pfree(norms);
 			break;				/* lexeme already normalized or is stop word */
 		}
 	}
 	FunctionCall1(
 				  &(prsobj->end_info),
--- a/contrib/tsearch2/ts_lexize.c
+++ b/contrib/tsearch2/ts_lexize.c
@ -0,0 +1,261 @@
 /*
 * lexize stream of lexemes 
 * Teodor Sigaev <teodor@sigaev.ru>
 */
 #include "postgres.h"
 #include <ctype.h>
 #include <locale.h>
 #include "ts_cfg.h"
 #include "dict.h"
 void
 LexizeInit(LexizeData *ld, TSCfgInfo *cfg) {
 	ld->cfg = cfg;
 	ld->curDictId = InvalidOid;
 	ld->posDict = 0;
 	ld->towork.head = ld->towork.tail = ld->curSub = NULL;
 	ld->waste.head = ld->waste.tail = NULL;
 	ld->lastRes=NULL;
 	ld->tmpRes=NULL;
 }
 static void
 LPLAddTail(ListParsedLex *list, ParsedLex *newpl) {
 	if ( list->tail ) {
 		list->tail->next = newpl;
 		list->tail = newpl;
 	} else
 		list->head = list->tail = newpl;
 	newpl->next = NULL;
 }
 static ParsedLex*
 LPLRemoveHead(ListParsedLex *list) {
 	ParsedLex *res = list->head;
 	if ( list->head ) 
 		list->head = list->head->next;
 	if ( list->head == NULL )
 		list->tail = NULL;
 	return res;
 }
 void
 LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) {
 	ParsedLex *newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
 	newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
 	newpl->type = type;
 	newpl->lemm = lemm;
 	newpl->lenlemm = lenlemm;
 	LPLAddTail(&ld->towork, newpl);
 	ld->curSub = ld->towork.tail;
 }
 static void
 RemoveHead(LexizeData *ld) {
 	LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
 	ld->posDict = 0;
 }
 static void
 setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) {
 	if ( correspondLexem ) {
 		*correspondLexem = ld->waste.head;
 	} else {
 		ParsedLex	*tmp, *ptr = ld->waste.head;
 		while(ptr) {
 			tmp = ptr->next;
 			pfree(ptr);
 			ptr = tmp;
 		}
 	}
 	ld->waste.head = ld->waste.tail = NULL;
 }
 static void
 moveToWaste(LexizeData *ld, ParsedLex *stop) {
 	bool	go = true;
 	while( ld->towork.head && go) {
 		if (ld->towork.head == stop) {
 			ld->curSub = stop->next;
 			go = false;
 		}
 		RemoveHead(ld);
 	}
 }
 static void
 setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res) {
 	if ( ld->tmpRes ) {
 		TSLexeme	*ptr;
 		for( ptr=ld->tmpRes; ptr->lexeme; ptr++ ) 
 			pfree( ptr->lexeme );
 		pfree( ld->tmpRes );
 	}
 	ld->tmpRes = res;
 	ld->lastRes = lex;
 }
 TSLexeme*
 LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
 	int i;
 	ListDictionary	*map;
 	DictInfo *dict;
 	TSLexeme	*res;
 	if ( ld->curDictId == InvalidOid ) {
 		/* 
 		 * usial mode: dictionary wants only one word,
 		 * but we should keep in mind that we should go through
 		 * all stack
 		 */
 		while( ld->towork.head ) {
 			ParsedLex	*curVal = ld->towork.head;
 			map = ld->cfg->map + curVal->type;
 			if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0 ) {	
 				/* skip this type of lexeme */
 				RemoveHead(ld);
 				continue;
 			}
 			for (i = ld->posDict; i < map->len; i++) {
 				dict = finddict(DatumGetObjectId(map->dict_id[i]));
 				ld->dictState.isend = ld->dictState.getnext = false;
 				ld->dictState.private = NULL;
 				res = (TSLexeme *) DatumGetPointer( FunctionCall4(
 													&(dict->lexize_info),
 									   				PointerGetDatum(dict->dictionary),
 												   	PointerGetDatum(curVal->lemm),
 												 	Int32GetDatum(curVal->lenlemm),
 													PointerGetDatum(&ld->dictState)
 										 ));
 				if ( ld->dictState.getnext ) {
 					/* 
 					 * dictinary wants next word, so setup and store
 					 * current position and go to multiword  mode
 					 */
 					ld->curDictId = DatumGetObjectId(map->dict_id[i]);
 					ld->posDict = i+1;
 					ld->curSub = curVal->next;
 					if ( res )
 						setNewTmpRes(ld, curVal, res);
 					return LexizeExec(ld, correspondLexem);
 				}
 				if (!res)			/* dictionary doesn't know this lexeme */
 					continue;
 				RemoveHead(ld);
 				setCorrLex(ld, correspondLexem);
 				return res;
 			}
 			RemoveHead(ld);
 		} 
 	} else { /* curDictId is valid */
 		dict = finddict(ld->curDictId);
 		/*
 		 * Dictionary ld->curDictId asks  us about following words
 		 */
 		while( ld->curSub ) {
 			ParsedLex	*curVal = ld->curSub;
 			map = ld->cfg->map + curVal->type;
 			if (curVal->type != 0) {
 				bool dictExists = false;
 				if (curVal->type >= ld->cfg->len || map->len == 0 ) {	
 					/* skip this type of lexeme */
 					ld->curSub = curVal->next;
 					continue;
 				}
 				/*
 				 * We should be sure that current type of lexeme is recognized by
 				 * our dictinonary: we just check is it exist in 
 				 * list of dictionaries ?
 				 */
 				for(i=0;i < map->len && !dictExists; i++) 
 					if ( ld->curDictId == DatumGetObjectId(map->dict_id[i]) )
 						dictExists = true;
 				if ( !dictExists ) {
 					/*
 					 * Dictionary can't work with current tpe of lexeme,
 					 * return to basic mode and redo all stored lexemes
 					 */
 					ld->curDictId = InvalidOid;
 					return LexizeExec(ld, correspondLexem);
 				}
 			} 
 			ld->dictState.isend = (curVal->type==0) ? true : false;
 			ld->dictState.getnext = false;
 			res = (TSLexeme *) DatumGetPointer( FunctionCall4(
 												&(dict->lexize_info),
 								   				PointerGetDatum(dict->dictionary),
 											   	PointerGetDatum(curVal->lemm),
 											 	Int32GetDatum(curVal->lenlemm),
 												PointerGetDatum(&ld->dictState)
 										 ));
 			if ( ld->dictState.getnext ) {
 				/* Dictionary wants one more */
 				ld->curSub = curVal->next;
 				if ( res )
 					setNewTmpRes(ld, curVal, res);
 				continue;
 			}
 			if ( res || ld->tmpRes ) {
 				/*
 				 * Dictionary normalizes lexemes,
 				 * so we remove from stack all used lexemes ,
 				 * return to basic mode and redo end of stack (if it exists)
 				 */
 				if ( res ) {
 					moveToWaste( ld, ld->curSub );
 				} else {
 					res = ld->tmpRes;
 					moveToWaste( ld, ld->lastRes );
 				}
 				/* reset to initial state */
 				ld->curDictId = InvalidOid;
 				ld->posDict = 0;
 				ld->lastRes = NULL;
 				ld->tmpRes = NULL;
 				setCorrLex(ld, correspondLexem);
 				return res;
 			}
 			/* Dict don't want next lexem and didn't recognize anything,
 			   redo from ld->towork.head */
 			ld->curDictId = InvalidOid;
 			return LexizeExec(ld, correspondLexem);
 		}	
 	}
 	setCorrLex(ld, correspondLexem);
 	return NULL;
 }
--- a/contrib/tsearch2/tsearch.sql.in
+++ b/contrib/tsearch2/tsearch.sql.in
@ -146,6 +146,25 @@ insert into pg_ts_dict select
 	'Example of synonym dictionary'
 ;
 CREATE FUNCTION thesaurus_init(internal)
 	RETURNS internal
 	as 'MODULE_PATHNAME' 
 	LANGUAGE C;
 CREATE FUNCTION thesaurus_lexize(internal,internal,int4,internal)
 	RETURNS internal
 	as 'MODULE_PATHNAME'
 	LANGUAGE C
 	RETURNS NULL ON NULL INPUT;
 insert into pg_ts_dict select 
 	'thesaurus_template', 
 	'thesaurus_init(internal)',
 	null,
 	'thesaurus_lexize(internal,internal,int4,internal)',
 	'Thesaurus template, must be pointed Dictionary and DictFile'
 ;
 --dict conf
 CREATE TABLE pg_ts_parser (
 	prs_name	text not null primary key,
@ -1193,7 +1212,11 @@ AS
 --example of ISpell dictionary
 --update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
 --example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_name='synonym';
 --example of thesaurus dict
 --update pg_ts_dict set dict_initoption='DictFile="contrib/thesaurus", Dictionary="en_stem"' where dict_name='thesaurus_template';
 --update pg_ts_cfgmap set dict_name = '{thesaurus_template,en_stem}' where dict_name = '{en_stem}';
 END;
--- a/contrib/tsearch2/untsearch.sql.in
+++ b/contrib/tsearch2/untsearch.sql.in
@ -41,6 +41,8 @@ DROP FUNCTION snb_lexize(internal,internal,int4);
 DROP FUNCTION snb_ru_init(internal);
 DROP FUNCTION spell_init(internal);
 DROP FUNCTION spell_lexize(internal,internal,int4);
 DROP FUNCTION thesaurus_init(internal);
 DROP FUNCTION thesaurus_lexize(internal,internal,int4);
 DROP FUNCTION syn_init(internal);
 DROP FUNCTION syn_lexize(internal,internal,int4);
 DROP FUNCTION set_curprs(int);