Text parser rewritten:

- supports multibyte encodings - more strict rules for lexemes - flex isn't used Add: - tsquery plainto_tsquery(text) Function makes tsquery from plain text. - &&, ||, !! operation for tsquery for combining tsquery from it's parts: 'foo & bar' || 'asd' => 'foo & bar | asd'
21 years ago · c52795d18a
parent b91e6ed93e
commit c52795d18a
15 changed files with 1613 additions and 424 deletions
--- a/contrib/tsearch2/Makefile
+++ b/contrib/tsearch2/Makefile
@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.11 2005/11/08 17:08:46 teodor Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $

 MODULE_big = tsearch2
 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
@ -6,7 +6,8 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
       wparser.o wparser_def.o \
       ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
       tsvector_op.o rank.o ts_stat.o \
-       query_util.o query_support.o query_rewrite.o query_gist.o
+       query_util.o query_support.o query_rewrite.o query_gist.o \
+       ts_locale.o

 SUBDIRS     := snowball ispell wordparser
 SUBDIROBJS  := $(SUBDIRS:%=%/SUBSYS.o)
--- a/contrib/tsearch2/expected/tsearch2.out
+++ b/contrib/tsearch2/expected/tsearch2.out
@ -13,12 +13,12 @@ psql:tsearch2.sql:342: NOTICE:  argument type tsvector is only a shell
 psql:tsearch2.sql:396: NOTICE:  type "tsquery" is not yet defined
 DETAIL:  Creating a shell type definition.
 psql:tsearch2.sql:401: NOTICE:  argument type tsquery is only a shell
-psql:tsearch2.sql:544: NOTICE:  type "gtsvector" is not yet defined
+psql:tsearch2.sql:559: NOTICE:  type "gtsvector" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:549: NOTICE:  argument type gtsvector is only a shell
-psql:tsearch2.sql:998: NOTICE:  type "gtsq" is not yet defined
+psql:tsearch2.sql:564: NOTICE:  argument type gtsvector is only a shell
+psql:tsearch2.sql:1054: NOTICE:  type "gtsq" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:1003: NOTICE:  argument type gtsq is only a shell
+psql:tsearch2.sql:1059: NOTICE:  argument type gtsq is only a shell
 --tsvector
 SELECT '1'::tsvector;
 tsvector 
@ -653,7 +653,7 @@ select * from token_type('default');
    11 | lpart_hword  | Latin part of hyphenated word
    12 | blank        | Space symbols
    13 | tag          | HTML Tag
-    14 | http         | HTTP head
+    14 | protocol     | Protocol head
    15 | hword        | Hyphenated word
    16 | lhword       | Latin hyphenated word
    17 | nlhword      | Non-latin hyphenated word
@ -672,14 +672,13 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
 -------+--------------------------------------
    22 | 345
    12 |  
-     4 | qwe@efd.r
-    12 |  
-    12 | '
-    12 |  
+     1 | qwe
+    12 | @
+    19 | efd.r
+    12 |  ' 
    14 | http://
     6 | www.com
-    12 | /
-    12 |  
+    12 | / 
    14 | http://
     5 | aew.werc.ewr/?ad=qwe&dw
     6 | aew.werc.ewr
@ -700,10 +699,8 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
     6 | 4aew.werc.ewr
    12 |  
    14 | http://
-     5 | 5aew.werc.ewr:8100/?
-     6 | 5aew.werc.ewr
-    18 | :8100/?
-    12 |   
+     6 | 5aew.werc.ewr:8100
+    12 | /?  
     1 | ad
    12 | =
     1 | qwe
@ -711,12 +708,12 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
     1 | dw
    12 |  
     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
-     6 | 6aew.werc.ewr
-    18 | :8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr:8100
+    18 | /?ad=qwe&dw
    12 |  
     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
-     6 | 7aew.werc.ewr
-    18 | :8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr:8100
+    18 | /?ad=qwe&dw=%20%32
    12 |  
     7 | +4.0e-10
    12 |  
@ -747,11 +744,15 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
     1 | jf
    12 |  
     1 | sdjk
-    13 | <we hjwer <werrwe>
+    12 | <
+     1 | we
    12 |  
-     3 | ewr1
-    12 | >
+     1 | hjwer
+    12 |  
+    13 | <werrwe>
    12 |  
+     3 | ewr1
+    12 | > 
     3 | ewri2
    12 |  
    13 | <a href="qwe<qwe>">
@ -767,57 +768,53 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
    12 |  
    19 | /wqe-324/ewr
    12 |  
-     6 | gist.h
-    12 |  
-     6 | gist.h.c
+    19 | gist.h
    12 |  
-     6 | gist.c
-    12 | .
+    19 | gist.h.c
    12 |  
+    19 | gist.c
+    12 | . 
     1 | readline
    12 |  
    20 | 4.2
    12 |  
    20 | 4.2
-    12 | .
-    12 |  
+    12 | . 
    20 | 4.2
-    12 | ,
-    12 |  
-    15 | readline-4
+    12 | , 
+    15 | readline-4.2
    11 | readline
    12 | -
    20 | 4.2
    12 |  
-    15 | readline-4
+    15 | readline-4.2
    11 | readline
    12 | -
    20 | 4.2
-    12 | .
-    12 |  
+    12 | . 
    22 | 234
    12 |  

-    13 | <i <b>
+    12 | <
+     1 | i
+    12 |  
+    13 | <b>
    12 |  
     1 | wow
    12 |   
-    12 | <
-    12 |  
+    12 | < 
     1 | jqw
    12 |  
-    12 | <
-    12 | >
-    12 |  
+    12 | <> 
     1 | qwerty
-(138 rows)
+(135 rows)

 SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
 /usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
 <i <b> wow  < jqw <> qwerty');
-                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 'qwe@efd.r':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 'teodor@stack.net':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+                                                                                                                                                                                                                                                                                                                                                                                                                             to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                              
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':17 'dw':19 'jf':39 '234':63 '345':1 '4.2':54,55,56,59,62 '455':31 'jqw':66 'qwe':2,18,27,28,35 'wer':36 'wow':65 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':67 '234.435':30 'qwe-wer':34 'readlin':53,58,61 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 'readline-4.2':57,60 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
 (1 row)

 SELECT length(to_tsvector('default', '345 qw'));
@ -831,7 +828,7 @@ SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://ae
 <i <b> wow  < jqw <> qwerty'));
 length 
 --------
-     53
+     51
 (1 row)

 select to_tsquery('default', 'qwe & sKies '); 
@ -876,6 +873,36 @@ select to_tsquery('default', '(the|and&(i&1))&fghj');
 '1' & 'fghj'
 (1 row)

+select plainto_tsquery('default', 'the and z 1))& fghj');
+  plainto_tsquery   
+--------------------
+ 'z' & '1' & 'fghj'
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
+       ?column?        
+-----------------------
+ 'foo' & 'bar' & 'asd'
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
+           ?column?           
+------------------------------
+ 'foo' & 'bar' | 'asd' & 'fg'
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
+             ?column?              
+-----------------------------------
+ 'foo' & 'bar' | !( 'asd' & 'fg' )
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') && 'asd | fg';
+             ?column?             
+----------------------------------
+ 'foo' & 'bar' & ( 'asd' | 'fg' )
+(1 row)
+
 select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
 ?column? 
 ----------
--- a/contrib/tsearch2/query.c
+++ b/contrib/tsearch2/query.c
@ -51,10 +51,20 @@ Datum		to_tsquery_name(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(to_tsquery_current);
 Datum		to_tsquery_current(PG_FUNCTION_ARGS);

+PG_FUNCTION_INFO_V1(plainto_tsquery);
+Datum		plainto_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(plainto_tsquery_name);
+Datum		plainto_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(plainto_tsquery_current);
+Datum		plainto_tsquery_current(PG_FUNCTION_ARGS);
+
 /* parser's states */
 #define WAITOPERAND 1
 #define WAITOPERATOR	2
 #define WAITFIRSTOPERAND 3
+#define WAITSINGLEOPERAND 4

 /*
 * node of query tree, also used
@ -195,6 +205,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
 				else if (*(state->buf) != ' ')
 					return ERR;
 				break;
+			case WAITSINGLEOPERAND:
+				if ( *(state->buf) == '\0' ) 
+					return END;
+				*strval = state->buf;
+				*lenval = strlen( state->buf );
+				state->buf += strlen( state->buf );
+				state->count++;
+				return VAL;	
 			default:
 				return ERR;
 				break;
@ -582,7 +600,7 @@ findoprnd(ITEM * ptr, int4 *pos)
 * input
 */
 static QUERYTYPE *
-			queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id, bool isplain)
 {
 	QPRS_STATE	state;
 	int4		i;
@ -599,7 +617,7 @@ static QUERYTYPE *

 	/* init state */
 	state.buf = buf;
-	state.state = WAITFIRSTOPERAND;
+	state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
 	state.count = 0;
 	state.num = 0;
 	state.str = NULL;
@ -679,7 +697,7 @@ Datum
 tsquery_in(PG_FUNCTION_ARGS)
 {
 	SET_FUNCOID();
-	PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+	PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0, false));
 }

 /*
@ -910,7 +928,7 @@ to_tsquery(PG_FUNCTION_ARGS)
 	str = text2char(in);
 	PG_FREE_IF_COPY(in, 1);

-	query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+	query = queryin(str, pushval_morph, PG_GETARG_INT32(0),false);
 	
 	if ( query->size == 0 )
 		PG_RETURN_POINTER(query);
@ -950,3 +968,59 @@ to_tsquery_current(PG_FUNCTION_ARGS)
 										Int32GetDatum(get_currcfg()),
 										PG_GETARG_DATUM(0)));
 }
+
+Datum
+plainto_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_P(1);
+	char	   *str;
+	QUERYTYPE  *query;
+	ITEM	   *res;
+	int4		len;
+
+	SET_FUNCOID();
+
+	str = text2char(in);
+	PG_FREE_IF_COPY(in, 1);
+
+	query = queryin(str, pushval_morph, PG_GETARG_INT32(0), true);
+	
+	if ( query->size == 0 )
+		PG_RETURN_POINTER(query);
+
+	res = clean_fakeval_v2(GETQUERY(query), &len);
+	if (!res)
+	{
+		query->len = HDRSIZEQT;
+		query->size = 0;
+		PG_RETURN_POINTER(query);
+	}
+	memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+	pfree(res);
+	PG_RETURN_POINTER(query);
+}
+
+Datum
+plainto_tsquery_name(PG_FUNCTION_ARGS)
+{
+	text	   *name = PG_GETARG_TEXT_P(0);
+	Datum		res;
+
+	SET_FUNCOID();
+	res = DirectFunctionCall2(plainto_tsquery,
+							  Int32GetDatum(name2id_cfg(name)),
+							  PG_GETARG_DATUM(1));
+
+	PG_FREE_IF_COPY(name, 0);
+	PG_RETURN_DATUM(res);
+}
+
+Datum
+plainto_tsquery_current(PG_FUNCTION_ARGS)
+{
+	SET_FUNCOID();
+	PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery,
+										Int32GetDatum(get_currcfg()),
+										PG_GETARG_DATUM(0)));
+}
+
--- a/contrib/tsearch2/query_support.c
+++ b/contrib/tsearch2/query_support.c
@ -14,6 +14,117 @@ tsquery_numnode(PG_FUNCTION_ARGS) {
 	PG_RETURN_INT32(nnode);
 }

+static QTNode* 
+join_tsqueries(QUERYTYPE *a, QUERYTYPE *b) {
+	QTNode  *res=(QTNode*)palloc0( sizeof(QTNode) );
+
+	res->flags |= QTN_NEEDFREE;
+
+	res->valnode = (ITEM*)palloc0( sizeof(ITEM) );
+	res->valnode->type = OPR;
+
+	res->child = (QTNode**)palloc0( sizeof(QTNode*)*2 );
+	res->child[0] = QT2QTN( GETQUERY(b), GETOPERAND(b) );
+	res->child[1] = QT2QTN( GETQUERY(a), GETOPERAND(a) );
+	res->nchild = 2;
+
+	return res;
+}
+
+PG_FUNCTION_INFO_V1(tsquery_and);
+Datum           tsquery_and(PG_FUNCTION_ARGS);
+
+Datum
+tsquery_and(PG_FUNCTION_ARGS) {
+	QUERYTYPE  *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
+	QUERYTYPE  *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
+	QTNode	*res;
+	QUERYTYPE  *query;
+
+	if ( a->size == 0 ) {
+		PG_FREE_IF_COPY(a,1);
+		PG_RETURN_POINTER(b);
+	} else if ( b->size == 0 ) {
+		PG_FREE_IF_COPY(b,1);
+		PG_RETURN_POINTER(a);
+	}  	
+
+	res = join_tsqueries(a, b);
+
+	res->valnode->val = '&';
+
+	query = QTN2QT( res, PlainMemory );
+
+	QTNFree(res);
+	PG_FREE_IF_COPY(a,0);
+	PG_FREE_IF_COPY(b,1);
+
+	PG_RETURN_POINTER(query);
+}
+
+PG_FUNCTION_INFO_V1(tsquery_or);
+Datum           tsquery_or(PG_FUNCTION_ARGS);
+
+Datum
+tsquery_or(PG_FUNCTION_ARGS) {
+	QUERYTYPE  *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
+	QUERYTYPE  *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
+	QTNode	*res;
+	QUERYTYPE  *query;
+
+	if ( a->size == 0 ) {
+		PG_FREE_IF_COPY(a,1);
+		PG_RETURN_POINTER(b);
+	} else if ( b->size == 0 ) {
+		PG_FREE_IF_COPY(b,1);
+		PG_RETURN_POINTER(a);
+	}  	
+
+	res = join_tsqueries(a, b);
+
+	res->valnode->val = '|';
+
+	query = QTN2QT( res, PlainMemory );
+
+	QTNFree(res);
+	PG_FREE_IF_COPY(a,0);
+	PG_FREE_IF_COPY(b,1);
+
+	PG_RETURN_POINTER(query);
+}
+
+PG_FUNCTION_INFO_V1(tsquery_not);
+Datum           tsquery_not(PG_FUNCTION_ARGS);
+
+Datum
+tsquery_not(PG_FUNCTION_ARGS) {
+	QUERYTYPE  *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
+	QTNode	*res;
+	QUERYTYPE  *query;
+
+	if ( a->size == 0 ) 
+		PG_RETURN_POINTER(a);
+
+	res=(QTNode*)palloc0( sizeof(QTNode) );
+
+	res->flags |= QTN_NEEDFREE;
+
+	res->valnode = (ITEM*)palloc0( sizeof(ITEM) );
+	res->valnode->type = OPR;
+	res->valnode->val = '!';
+
+	res->child = (QTNode**)palloc0( sizeof(QTNode*) );
+	res->child[0] = QT2QTN( GETQUERY(a), GETOPERAND(a) );
+	res->nchild = 1;
+
+	query = QTN2QT( res, PlainMemory );
+
+	QTNFree(res);
+	PG_FREE_IF_COPY(a,0);
+
+	PG_RETURN_POINTER(query);
+}
+
 static int
 CompareTSQ( QUERYTYPE *a, QUERYTYPE *b ) {
 	if ( a->size != b->size ) {
--- a/contrib/tsearch2/sql/tsearch2.sql
+++ b/contrib/tsearch2/sql/tsearch2.sql
@ -173,6 +173,13 @@ select to_tsquery('default', 'asd&(and|fghj)');
 select to_tsquery('default', '(asd&and)|fghj');
 select to_tsquery('default', '(asd&!and)|fghj');
 select to_tsquery('default', '(the|and&(i&1))&fghj');
+
+select plainto_tsquery('default', 'the and z 1))& fghj');
+select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
+select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
+select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
+select plainto_tsquery('default', 'foo bar') && 'asd | fg';
+
 select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
 select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
 select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
--- a/contrib/tsearch2/ts_locale.c
+++ b/contrib/tsearch2/ts_locale.c
@ -0,0 +1,61 @@
+#include "ts_locale.h"
+
+#include "utils/builtins.h"
+#include "utils/pg_locale.h"
+#include "mb/pg_wchar.h"
+
+
+#if defined(TS_USE_WIDE) && defined(WIN32)
+
+size_t
+wchar2char( const char *to, const wchar_t *from, size_t len ) {
+	if (GetDatabaseEncoding() == PG_UTF8) {
+		int	r;
+
+		if (len==0)
+			return 0;
+
+		r = WideCharToMultiByte(CP_UTF8, 0, from, len, to, nbytes,
+				NULL, NULL);
+
+		
+		if ( r==0 )
+			ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				 	errmsg("UTF-16 to UTF-8 translation failed: %lu",
+						GetLastError())));
+
+		return r;
+	}
+
+	return wcstombs(to, from, len);
+}
+
+size_t 
+char2wchar( const wchar_t *to, const char *from, size_t len ) {
+	if (GetDatabaseEncoding() == PG_UTF8) {
+		int	r;
+
+		if (len==0)
+			return 0;
+
+		r = MultiByteToWideChar(CP_UTF8, 0, from, len,
+			to, len);
+
+		if (!r) {
+			pg_verifymbstr(from, len, false);
+			ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				errmsg("invalid multibyte character for locale"),
+				errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+		}
+
+		Assert(r <= nbytes);
+
+		return r;
+	}
+	
+	return mbstowcs(to, from, len);
+}
+
+#endif
--- a/contrib/tsearch2/ts_locale.h
+++ b/contrib/tsearch2/ts_locale.h
@ -0,0 +1,38 @@
+#ifndef __TSLOCALE_H__
+#define __TSLOCALE_H__
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+/*
+ * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
+ * declare them in <wchar.h>.
+ */
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif
+
+#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
+#define TS_USE_WIDE
+
+#ifdef WIN32
+
+size_t wchar2char( const char *to, const wchar_t *from, size_t len );
+size_t char2wchar( const wchar_t *to, const char *from, size_t len );
+
+#else /* WIN32 */
+
+/* correct mbstowcs */
+#define char2wchar mbstowcs
+#define wchar2char wcstombs
+
+#endif /* WIN32 */
+ 
+#endif /* defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) */ 
+
+#endif  /* __TSLOCALE_H__ */
--- a/contrib/tsearch2/tsearch.sql.in
+++ b/contrib/tsearch2/tsearch.sql.in
@ -427,6 +427,21 @@ RETURNS tsquery
 AS 'MODULE_PATHNAME','to_tsquery_current'
 LANGUAGE 'c' with (isstrict,iscachable);

+CREATE FUNCTION plainto_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION plainto_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','plainto_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION plainto_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','plainto_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
 --operations
 CREATE FUNCTION exectsq(tsvector, tsquery)
 RETURNS bool
@ -929,6 +944,47 @@ CREATE OR REPLACE FUNCTION numnode(tsquery)
        language 'C'
        with (isstrict,iscachable);

+CREATE OR REPLACE FUNCTION tsquery_and(tsquery,tsquery)
+        returns tsquery
+        as 'MODULE_PATHNAME', 'tsquery_and'
+        language 'C'
+        with (isstrict,iscachable);
+
+CREATE OPERATOR && (
+        LEFTARG = tsquery,
+        RIGHTARG = tsquery,
+        PROCEDURE = tsquery_and,
+        COMMUTATOR = '&&',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+CREATE OR REPLACE FUNCTION tsquery_or(tsquery,tsquery)
+        returns tsquery
+        as 'MODULE_PATHNAME', 'tsquery_or'
+        language 'C'
+        with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsquery,
+        RIGHTARG = tsquery,
+        PROCEDURE = tsquery_or,
+        COMMUTATOR = '||',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+CREATE OR REPLACE FUNCTION tsquery_not(tsquery)
+        returns tsquery
+        as 'MODULE_PATHNAME', 'tsquery_not'
+        language 'C'
+        with (isstrict,iscachable);
+
+CREATE OPERATOR !! (
+        RIGHTARG = tsquery,
+        PROCEDURE = tsquery_not
+);
+
 --------------rewrite subsystem

 CREATE OR REPLACE FUNCTION rewrite(tsquery, text)
--- a/contrib/tsearch2/wordparser/Makefile
+++ b/contrib/tsearch2/wordparser/Makefile
@ -1,8 +1,8 @@
-# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.8 2005/10/18 01:30:49 tgl Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.9 2005/11/21 12:27:57 teodor Exp $

 SUBOBJS =  parser.o deflex.o

-EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) parser.c
+EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)

 PG_CPPFLAGS = -I$(srcdir)/..

@ -20,13 +20,6 @@ override CFLAGS += $(CFLAGS_SL)

 all: SUBSYS.o

-parser.c: parser.l
-ifdef FLEX
-	$(FLEX) $(FLEXFLAGS) -8 -Ptsearch2_yy -o'$@' $<
-else
-	@$(missing) flex $< $@
-endif
-
 SUBSYS.o: $(SUBOBJS)
 	$(LD) $(LDREL) $(LDOUT) $@ $^

--- a/contrib/tsearch2/wordparser/deflex.c
+++ b/contrib/tsearch2/wordparser/deflex.c
@ -15,7 +15,7 @@ const char *lex_descr[] = {
 	"Latin part of hyphenated word",
 	"Space symbols",
 	"HTML Tag",
-	"HTTP head",
+	"Protocol head",
 	"Hyphenated word",
 	"Latin hyphenated word",
 	"Non-latin hyphenated word",
@ -42,7 +42,7 @@ const char *tok_alias[] = {
 	"lpart_hword",
 	"blank",
 	"tag",
-	"http",
+	"protocol",
 	"hword",
 	"lhword",
 	"nlhword",
--- a/contrib/tsearch2/wordparser/deflex.h
+++ b/contrib/tsearch2/wordparser/deflex.h
@ -17,7 +17,7 @@
 #define LATPARTHYPHENWORD	11
 #define SPACE		12
 #define TAG			13
-#define HTTP		14
+#define PROTOCOL		14
 #define HYPHENWORD	15
 #define LATHYPHENWORD	16
 #define CYRHYPHENWORD	17
--- a/contrib/tsearch2/wordparser/parser.c
+++ b/contrib/tsearch2/wordparser/parser.c
--- a/contrib/tsearch2/wordparser/parser.h
+++ b/contrib/tsearch2/wordparser/parser.h
@ -1,10 +1,147 @@
 #ifndef __PARSER_H__
 #define __PARSER_H__

-extern char *token;
-extern int	tokenlen;
-int			tsearch2_yylex(void);
-void		tsearch2_start_parse_str(char *, int);
-void		tsearch2_end_parse(void);
+#include <ctype.h>
+#include <limits.h>
+#include "ts_locale.h"
+
+typedef enum {
+	TPS_Base = 0,
+	TPS_InUWord,
+	TPS_InLatWord,
+	TPS_InCyrWord,
+	TPS_InUnsignedInt,
+	TPS_InSignedIntFirst,
+	TPS_InSignedInt,
+	TPS_InSpace,
+	TPS_InUDecimalFirst,
+	TPS_InUDecimal,
+	TPS_InDecimalFirst,
+	TPS_InDecimal,
+	TPS_InVersionFirst,
+	TPS_InVersion,
+	TPS_InMantissaFirst,
+	TPS_InMantissaSign,
+	TPS_InMantissa,
+	TPS_InHTMLEntityFirst,
+	TPS_InHTMLEntity,
+	TPS_InHTMLEntityNumFirst,
+	TPS_InHTMLEntityNum,
+	TPS_InHTMLEntityEnd,
+	TPS_InTagFirst,
+	TPS_InTagCloseFirst,
+	TPS_InTag,
+	TPS_InTagEscapeK,
+	TPS_InTagEscapeKK,
+	TPS_InTagBackSleshed,
+	TPS_InTagEnd,
+	TPS_InCommentFirst,
+	TPS_InCommentLast,
+	TPS_InComment,
+	TPS_InCloseCommentFirst,
+	TPS_InCloseCommentLast,
+	TPS_InCommentEnd,
+	TPS_InHostFirstDomen,
+	TPS_InHostDomenSecond,
+	TPS_InHostDomen,
+	TPS_InPortFirst,
+	TPS_InPort,
+	TPS_InHostFirstAN,
+	TPS_InHost,
+	TPS_InEmail,
+	TPS_InFileFirst,
+	TPS_InFile,
+	TPS_InFileNext,
+	TPS_InURIFirst,
+	TPS_InURIStart,
+	TPS_InURI,
+	TPS_InFURL,
+	TPS_InProtocolFirst,
+	TPS_InProtocolSecond,
+	TPS_InProtocolEnd,
+	TPS_InHyphenLatWordFirst,
+	TPS_InHyphenLatWord,
+	TPS_InHyphenCyrWordFirst,
+	TPS_InHyphenCyrWord,
+	TPS_InHyphenUWordFirst,
+	TPS_InHyphenUWord,
+	TPS_InHyphenValueFirst,
+	TPS_InHyphenValue,
+	TPS_InHyphenValueExact,
+	TPS_InParseHyphen,
+	TPS_InParseHyphenHyphen,
+	TPS_InHyphenCyrWordPart,
+	TPS_InHyphenLatWordPart,
+	TPS_InHyphenUWordPart,
+	TPS_InHyphenUnsignedInt,
+	TPS_InHDecimalPartFirst,
+	TPS_InHDecimalPart,
+	TPS_InHVersionPartFirst,
+	TPS_InHVersionPart,
+	TPS_Null  /* last state (fake value) */
+} TParserState;
+
+/* forward declaration */
+struct TParser;
+
+
+typedef int (*TParserCharTest)(struct TParser*);  /* any p_is* functions except p_iseq */
+typedef void (*TParserSpecial)(struct TParser*);  /* special handler for special cases... */
+
+typedef struct {
+        TParserCharTest isclass;
+        char            c;
+        uint16          flags;
+        TParserState    tostate;
+        int             type;
+        TParserSpecial  special;
+} TParserStateActionItem;
+
+typedef struct {
+        TParserState            state;
+        TParserStateActionItem  *action;
+} TParserStateAction;
+
+typedef struct TParserPosition {
+	int		posbyte; /* position of parser in bytes */
+	int		poschar; /* osition of parser in characters */
+	int		charlen; /* length of current char */
+	int 		lenbytelexeme;
+	int 		lencharlexeme;
+	TParserState	state;
+	struct TParserPosition	*prev;
+	int		flags;
+	TParserStateActionItem	*pushedAtAction;
+} TParserPosition;
+
+typedef struct TParser {
+	/* string and position information */
+	char 		*str;  /* multibyte string */
+	int		lenstr; /* length of mbstring */
+	wchar_t		*wstr;  /* wide character string */ 
+	int		lenwstr; /* length of wsting */
+
+	/* State of parse */
+	int		charmaxlen;
+	bool		usewide;
+	TParserPosition	*state;
+	bool		ignore;
+	bool		wanthost;
+
+	/* silly char */
+	char c;
+
+	/* out */
+	char	 	*lexeme;
+	int 		lenbytelexeme;
+	int 		lencharlexeme;
+	int 		type;
+	
+} TParser;
+
+
+TParser* TParserInit( char *, int );
+bool	TParserGet( TParser* );
+void	TParserClose( TParser* );

 #endif
--- a/contrib/tsearch2/wordparser/parser.l
+++ b/contrib/tsearch2/wordparser/parser.l
@ -1,346 +0,0 @@
-%{
-#include "postgres.h"
-
-#include "deflex.h"
-#include "parser.h"
-#include "common.h"
-
-/* Avoid exit() on fatal scanner errors */
-#undef fprintf
-#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
-
-char *token = NULL;  /* pointer to token */
-int tokenlen;
-static char *s     = NULL;  /* to return WHOLE hyphenated-word */
-
-YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
-
-typedef struct {
-	int tlen;
-	int clen;
-	char *str;
-} TagStorage;
-
-static TagStorage ts={0,0,NULL};
-
-static void
-addTag(void)
-{
-	while( ts.clen+tsearch2_yyleng+1 > ts.tlen ) {
-		ts.tlen*=2;
-		ts.str=realloc(ts.str,ts.tlen);
-		if (!ts.str)
-                	ereport(ERROR,
-                               	(errcode(ERRCODE_OUT_OF_MEMORY),
-                               	 errmsg("out of memory")));
-        }
-        memcpy(ts.str+ts.clen,tsearch2_yytext,tsearch2_yyleng);
-        ts.clen+=tsearch2_yyleng;
-	ts.str[ts.clen]='\0';
-}
-
-static void
-startTag(void)
-{
-	if ( ts.str==NULL ) {
-		ts.tlen=tsearch2_yyleng+1;
-		ts.str=malloc(ts.tlen);
-		if (!ts.str)
-                	ereport(ERROR,
-                                (errcode(ERRCODE_OUT_OF_MEMORY),
-                                 errmsg("out of memory")));
-	}
-	ts.clen=0;
-	ts.str[0]='\0';
-	addTag();
-}
-
-%}
-
-%option 8bit
-%option never-interactive
-%option nodefault
-%option nounput
-%option noyywrap
-
-/* parser's state for parsing hyphenated-word */
-%x DELIM  
-/* parser's state for parsing URL*/
-%x URL  
-%x SERVER  
-
-/* parser's state for parsing TAGS */
-%x INTAG
-%x QINTAG
-%x INCOMMENT
-%x INSCRIPT
-
-/* cyrillic koi8 char */
-CYRALNUM	[0-9\200-\377]
-CYRALPHA	[\200-\377]
-ALPHA		[a-zA-Z\200-\377]
-ALNUM		[0-9a-zA-Z\200-\377]
-
-
-HOSTNAME	([-_[:alnum:]]+\.)+[[:alpha:]]+
-URI		[-_[:alnum:]/%,\.;=&?#]+
-
-%%
-
-"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; startTag(); }
-
-<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
-	BEGIN INITIAL; 
-	addTag();
-	token = ts.str;
-	tokenlen = ts.clen;
-	return TAG;
-}
-
-"<!--"	{ BEGIN INCOMMENT; startTag(); }
-
-<INCOMMENT>"-->"	{ 
-	BEGIN INITIAL;
-	addTag();
-	token = ts.str;
-	tokenlen = ts.clen;
-	return TAG;
-}
-
-
-"<"[\![:alpha:]]	{ BEGIN INTAG; startTag(); }
-
-"</"[[:alpha:]]	{ BEGIN INTAG; startTag(); }
-
-<INTAG>"\""	{ BEGIN QINTAG; addTag(); }
-
-<QINTAG>"\\\""	{ addTag(); }
-
-<QINTAG>"\""	{ BEGIN INTAG; addTag(); }
-
-<INTAG>">"	{ 
-	BEGIN INITIAL;
-	addTag();
-	token = ts.str;
-	tokenlen = ts.clen;
-	return TAG;
-}
-
-<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n { addTag(); }	
-
-\&(quot|amp|nbsp|lt|gt)\;   {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return HTMLENTITY;
-}
-
-\&\#[0-9][0-9]?[0-9]?\; {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return HTMLENTITY;
-}
- 
-[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return EMAIL; 
-}
-
-[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */ 	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return SCIENTIFIC; 
-}
-
-[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return VERSIONNUMBER;
-}
-
-[+-]?[0-9]+\.[0-9]+ {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return DECIMAL;
-}
-
-[+-][0-9]+ { 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return SIGNEDINT; 
-}
-
-<DELIM,INITIAL>[0-9]+ { 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return UNSIGNEDINT; 
-}
-
-http"://"        { 
-	BEGIN URL; 
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return HTTP;
-}
-
-ftp"://"        { 
-	BEGIN URL; 
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return HTTP;
-}
-
-<URL,INITIAL>{HOSTNAME}[/:]{URI} { 
-	BEGIN SERVER;
-	if (s) { free(s); s=NULL; } 
-	s = strdup( tsearch2_yytext ); 
-	tokenlen = tsearch2_yyleng;
-	yyless( 0 ); 
-	token = s;
-	return FURL;
-}
-
-<SERVER,URL,INITIAL>{HOSTNAME} {
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return HOST;
-}
-
-<SERVER>[/:]{URI} 	{
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return URI;
-}
-
-[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return FILEPATH;
-}
-
-({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */	{
-	BEGIN DELIM;
-	if (s) { free(s); s=NULL; } 
-	s = strdup( tsearch2_yytext );
-	tokenlen = tsearch2_yyleng;
-	yyless( 0 );
-	token = s;
-	return CYRHYPHENWORD;
-}
-
-([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */	{
-	 BEGIN DELIM;
-	if (s) { free(s); s=NULL; } 
-	s = strdup( tsearch2_yytext );
-	tokenlen = tsearch2_yyleng;
-	yyless( 0 );
-	token = s;
-	return LATHYPHENWORD;
-}
-
-({ALNUM}+-)+{ALNUM}+ /* composite-word */	{
-	BEGIN DELIM;
-	if (s) { free(s); s=NULL; } 
-	s = strdup( tsearch2_yytext );
-	tokenlen = tsearch2_yyleng;
-	yyless( 0 );
-	token = s;
-	return HYPHENWORD;
-}
-
-<DELIM>[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return VERSIONNUMBER;
-}
-
-<DELIM>\+?[0-9]+\.[0-9]+ {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return DECIMAL;
-}
-
-<DELIM>{CYRALPHA}+  /* one word in composite-word */	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return CYRPARTHYPHENWORD; 
-}
-
-<DELIM>[[:alpha:]]+  /* one word in composite-word */	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return LATPARTHYPHENWORD; 
-}
-
-<DELIM>{ALNUM}+  /* one word in composite-word */	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return PARTHYPHENWORD; 
-}
-
-<DELIM>-  { 
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return SPACE;
-}
-
-<DELIM,SERVER,URL>.|\n /* return in basic state */	{
-	BEGIN INITIAL;
-	yyless( 0 );
-}
-
-{CYRALPHA}+ /* normal word */	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return CYRWORD; 
-}
-
-[[:alpha:]]+ /* normal word */	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return LATWORD; 
-}
-
-{ALNUM}+ /* normal word */	{ 
-	token = tsearch2_yytext; 
-	tokenlen = tsearch2_yyleng;
-	return UWORD; 
-}
-
-[ \r\n\t]+ {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return SPACE;
-}
-
-. {
-	token = tsearch2_yytext;
-	tokenlen = tsearch2_yyleng;
-	return SPACE;
-} 
-
-%%
-
-/* clearing after parsing from string */
-void
-tsearch2_end_parse(void)
-{
-	if (s)
-	{
-		free(s);
-		s = NULL;
-	} 
-	tsearch2_yy_delete_buffer( buf );
-	buf = NULL;
-} 
-
-/* start parse from string */
-void
-tsearch2_start_parse_str(char* str, int limit)
-{
-	if (buf)
-		tsearch2_end_parse();
-	buf = tsearch2_yy_scan_bytes( str, limit );
-	tsearch2_yy_switch_to_buffer( buf );
-	BEGIN INITIAL;
-}
--- a/contrib/tsearch2/wparser_def.c
+++ b/contrib/tsearch2/wparser_def.c
@ -39,8 +39,7 @@ Datum		prsd_start(PG_FUNCTION_ARGS);
 Datum
 prsd_start(PG_FUNCTION_ARGS)
 {
-	tsearch2_start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));
-	PG_RETURN_POINTER(NULL);
+	PG_RETURN_POINTER(TParserInit( (char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)));
 }

 PG_FUNCTION_INFO_V1(prsd_getlexeme);
@ -48,14 +47,17 @@ Datum		prsd_getlexeme(PG_FUNCTION_ARGS);
 Datum
 prsd_getlexeme(PG_FUNCTION_ARGS)
 {
-	/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+	TParser *p=(TParser*)PG_GETARG_POINTER(0); 
 	char	  **t = (char **) PG_GETARG_POINTER(1);
 	int		   *tlen = (int *) PG_GETARG_POINTER(2);
-	int			type = tsearch2_yylex();

-	*t = token;
-	*tlen = tokenlen;
-	PG_RETURN_INT32(type);
+	if ( !TParserGet(p) ) 
+		PG_RETURN_INT32(0);
+
+	*t = p->lexeme; 
+	*tlen = p->lenbytelexeme;
+
+	PG_RETURN_INT32(p->type);
 }

 PG_FUNCTION_INFO_V1(prsd_end);
@ -63,8 +65,8 @@ Datum		prsd_end(PG_FUNCTION_ARGS);
 Datum
 prsd_end(PG_FUNCTION_ARGS)
 {
-	/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
-	tsearch2_end_parse();
+	TParser *p=(TParser*)PG_GETARG_POINTER(0);
+	TParserClose(p); 
 	PG_RETURN_VOID();
 }