@ -56,7 +56,7 @@ struct TSQueryParserStateData
/*
* subroutine to parse the modifiers ( weight and prefix flag currently )
* part , like ' : 1 AB ' of a query .
* part , like ' : AB * ' of a query .
*/
static char *
get_modifiers ( char * buf , int16 * weight , bool * prefix )
@ -100,6 +100,94 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
return buf ;
}
/*
* Parse phrase operator . The operator
* may take the following forms :
*
* a < X > b ( distance is no greater than X )
* a < - > b ( default distance = 1 )
*
* The buffer should begin with ' < ' char
*/
static char *
parse_phrase_operator ( char * buf , int16 * distance )
{
enum
{
PHRASE_OPEN = 0 ,
PHRASE_DIST ,
PHRASE_CLOSE ,
PHRASE_ERR ,
PHRASE_FINISH
} state = PHRASE_OPEN ;
char * ptr = buf ;
char * endptr ;
long l = 1 ;
while ( * ptr )
{
switch ( state )
{
case PHRASE_OPEN :
Assert ( t_iseq ( ptr , ' < ' ) ) ;
state = PHRASE_DIST ;
ptr + + ;
break ;
case PHRASE_DIST :
if ( t_iseq ( ptr , ' - ' ) )
{
state = PHRASE_CLOSE ;
ptr + + ;
break ;
}
else if ( ! t_isdigit ( ptr ) )
{
state = PHRASE_ERR ;
break ;
}
l = strtol ( ptr , & endptr , 10 ) ;
if ( ptr = = endptr )
state = PHRASE_ERR ;
else if ( errno = = ERANGE | | l > MAXENTRYPOS )
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_PARAMETER_VALUE ) ,
errmsg ( " distance in phrase operator should not be greater than %d " ,
MAXENTRYPOS ) ) ) ;
else
{
state = PHRASE_CLOSE ;
ptr = endptr ;
}
break ;
case PHRASE_CLOSE :
if ( t_iseq ( ptr , ' > ' ) )
{
state = PHRASE_FINISH ;
ptr + + ;
}
else
state = PHRASE_ERR ;
break ;
case PHRASE_FINISH :
* distance = ( int16 ) l ;
return ptr ;
case PHRASE_ERR :
default :
goto err ;
}
}
err :
* distance = - 1 ;
return buf ;
}
/*
* token types for parsing
*/
@ -116,8 +204,10 @@ typedef enum
/*
* get token from query string
*
* * operator is filled in with OP_ * when return values is PT_OPR
* * operator is filled in with OP_ * when return values is PT_OPR ,
* but * weight could contain a distance value in case of phrase operator .
* * strval , * lenval and * weight are filled in when return value is PT_VAL
*
*/
static ts_tokentype
gettoken_query ( TSQueryParserState state ,
@ -185,13 +275,23 @@ gettoken_query(TSQueryParserState state,
( state - > buf ) + + ;
return PT_OPR ;
}
if ( t_iseq ( state - > buf , ' | ' ) )
else if ( t_iseq ( state - > buf , ' | ' ) )
{
state - > state = WAITOPERAND ;
* operator = OP_OR ;
( state - > buf ) + + ;
return PT_OPR ;
}
else if ( t_iseq ( state - > buf , ' < ' ) )
{
state - > state = WAITOPERAND ;
* operator = OP_PHRASE ;
/* weight var is used as storage for distance */
state - > buf = parse_phrase_operator ( state - > buf , weight ) ;
if ( * weight < 0 )
return PT_ERR ;
return PT_OPR ;
}
else if ( t_iseq ( state - > buf , ' ) ' ) )
{
( state - > buf ) + + ;
@ -223,15 +323,16 @@ gettoken_query(TSQueryParserState state,
* Push an operator to state - > polstr
*/
void
pushOperator ( TSQueryParserState state , int8 oper )
pushOperator ( TSQueryParserState state , int8 oper , int16 distance )
{
QueryOperator * tmp ;
Assert ( oper = = OP_NOT | | oper = = OP_AND | | oper = = OP_OR ) ;
Assert ( oper = = OP_NOT | | oper = = OP_AND | | oper = = OP_OR | | oper = = OP_PHRASE ) ;
tmp = ( QueryOperator * ) palloc0 ( sizeof ( QueryOperator ) ) ;
tmp - > type = QI_OPR ;
tmp - > oper = oper ;
tmp - > distance = ( oper = = OP_PHRASE ) ? distance : 0 ;
/* left is filled in later with findoprnd */
state - > polstr = lcons ( tmp , state - > polstr ) ;
@ -330,14 +431,18 @@ makepol(TSQueryParserState state,
PushFunction pushval ,
Datum opaque )
{
int8 operator = 0 ;
ts_tokentype type ;
int lenval = 0 ;
char * strval = NULL ;
int8 opstack [ STACKDEPTH ] ;
int lenstack = 0 ;
int16 weight = 0 ;
bool prefix ;
int8 operator = 0 ;
ts_tokentype type ;
int lenval = 0 ;
char * strval = NULL ;
struct
{
int8 op ;
int16 distance ;
} opstack [ STACKDEPTH ] ;
int lenstack = 0 ;
int16 weight = 0 ;
bool prefix ;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth ( ) ;
@ -348,39 +453,48 @@ makepol(TSQueryParserState state,
{
case PT_VAL :
pushval ( opaque , state , strval , lenval , weight , prefix ) ;
while ( lenstack & & ( opstack [ lenstack - 1 ] = = OP_AND | |
opstack [ lenstack - 1 ] = = OP_NOT ) )
while ( lenstack & & ( opstack [ lenstack - 1 ] . op = = OP_AND | |
opstack [ lenstack - 1 ] . op = = OP_PHRASE | |
opstack [ lenstack - 1 ] . op = = OP_NOT ) )
{
lenstack - - ;
pushOperator ( state , opstack [ lenstack ] ) ;
pushOperator ( state ,
opstack [ lenstack ] . op ,
opstack [ lenstack ] . distance ) ;
}
break ;
case PT_OPR :
if ( lenstack & & operator = = OP_OR )
pushOperator ( state , OP_OR ) ;
pushOperator ( state , OP_OR , 0 ) ;
else
{
if ( lenstack = = STACKDEPTH ) /* internal error */
elog ( ERROR , " tsquery stack too small " ) ;
opstack [ lenstack ] = operator ;
opstack [ lenstack ] . op = operator ;
opstack [ lenstack ] . distance = weight ;
lenstack + + ;
}
break ;
case PT_OPEN :
makepol ( state , pushval , opaque ) ;
while ( lenstack & & ( opstack [ lenstack - 1 ] = = OP_AND | |
opstack [ lenstack - 1 ] = = OP_NOT ) )
while ( lenstack & & ( opstack [ lenstack - 1 ] . op = = OP_AND | |
opstack [ lenstack - 1 ] . op = = OP_PHRASE | |
opstack [ lenstack - 1 ] . op = = OP_NOT ) )
{
lenstack - - ;
pushOperator ( state , opstack [ lenstack ] ) ;
pushOperator ( state ,
opstack [ lenstack ] . op ,
opstack [ lenstack ] . distance ) ;
}
break ;
case PT_CLOSE :
while ( lenstack )
{
lenstack - - ;
pushOperator ( state , opstack [ lenstack ] ) ;
pushOperator ( state ,
opstack [ lenstack ] . op ,
opstack [ lenstack ] . distance ) ;
} ;
return ;
case PT_ERR :
@ -394,12 +508,14 @@ makepol(TSQueryParserState state,
while ( lenstack )
{
lenstack - - ;
pushOperator ( state , opstack [ lenstack ] ) ;
pushOperator ( state ,
opstack [ lenstack ] . op ,
opstack [ lenstack ] . distance ) ;
}
}
static void
findoprnd_recurse ( QueryItem * ptr , uint32 * pos , int nnodes )
findoprnd_recurse ( QueryItem * ptr , uint32 * pos , int nnodes , bool * needcleanup )
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth ( ) ;
@ -407,10 +523,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
if ( * pos > = nnodes )
elog ( ERROR , " malformed tsquery: operand not found " ) ;
if ( ptr [ * pos ] . type = = QI_VAL | |
ptr [ * pos ] . type = = QI_VALSTOP ) /* need to handle VALSTOP here, they
* haven ' t been cleaned away yet . */
if ( ptr [ * pos ] . type = = QI_VAL )
{
( * pos ) + + ;
}
else if ( ptr [ * pos ] . type = = QI_VALSTOP )
{
* needcleanup = true ; /* we'll have to remove stop words */
( * pos ) + + ;
}
else
@ -419,21 +538,32 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
if ( ptr [ * pos ] . qoperator . oper = = OP_NOT )
{
ptr [ * pos ] . qoperator . left = 1 ;
ptr [ * pos ] . qoperator . left = 1 ; /* fixed offset */
( * pos ) + + ;
findoprnd_recurse ( ptr , pos , nnodes ) ;
/* process the only argument */
findoprnd_recurse ( ptr , pos , nnodes , needcleanup ) ;
}
else
{
QueryOperator * curitem = & ptr [ * pos ] . qoperator ;
int tmp = * pos ;
QueryOperator * curitem = & ptr [ * pos ] . qoperator ;
int tmp = * pos ; /* save current position */
Assert ( curitem - > oper = = OP_AND | |
curitem - > oper = = OP_OR | |
curitem - > oper = = OP_PHRASE ) ;
Assert ( curitem - > oper = = OP_AND | | curitem - > oper = = OP_OR ) ;
if ( curitem - > oper = = OP_PHRASE )
* needcleanup = true ; /* push OP_PHRASE down later */
( * pos ) + + ;
findoprnd_recurse ( ptr , pos , nnodes ) ;
curitem - > left = * pos - tmp ;
findoprnd_recurse ( ptr , pos , nnodes ) ;
/* process RIGHT argument */
findoprnd_recurse ( ptr , pos , nnodes , needcleanup ) ;
curitem - > left = * pos - tmp ; /* set LEFT arg's offset */
/* process LEFT argument */
findoprnd_recurse ( ptr , pos , nnodes , needcleanup ) ;
}
}
}
@ -444,12 +574,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
* QueryItems must be in polish ( prefix ) notation .
*/
static void
findoprnd ( QueryItem * ptr , int size )
findoprnd ( QueryItem * ptr , int size , bool * needcleanup )
{
uint32 pos ;
* needcleanup = false ;
pos = 0 ;
findoprnd_recurse ( ptr , & pos , size ) ;
findoprnd_recurse ( ptr , & pos , size , needcleanup ) ;
if ( pos ! = size )
elog ( ERROR , " malformed tsquery: extra nodes " ) ;
@ -466,9 +597,6 @@ findoprnd(QueryItem *ptr, int size)
*
* opaque is passed on to pushval as is , pushval can use it to store its
* private state .
*
* The returned query might contain QI_STOPVAL nodes . The caller is responsible
* for cleaning them up ( with clean_fakeval )
*/
TSQuery
parse_tsquery ( char * buf ,
@ -482,6 +610,7 @@ parse_tsquery(char *buf,
int commonlen ;
QueryItem * ptr ;
ListCell * cell ;
bool needcleanup ;
/* init state */
state . buffer = buf ;
@ -531,7 +660,7 @@ parse_tsquery(char *buf,
i = 0 ;
foreach ( cell , state . polstr )
{
QueryItem * item = ( QueryItem * ) lfirst ( cell ) ;
QueryItem * item = ( QueryItem * ) lfirst ( cell ) ;
switch ( item - > type )
{
@ -555,7 +684,14 @@ parse_tsquery(char *buf,
pfree ( state . op ) ;
/* Set left operand pointers for every operator. */
findoprnd ( ptr , query - > size ) ;
findoprnd ( ptr , query - > size , & needcleanup ) ;
/*
* QI_VALSTOP nodes should be cleaned and
* and OP_PHRASE should be pushed down
*/
if ( needcleanup )
return cleanup_fakeval_and_phrase ( query ) ;
return query ;
}
@ -600,12 +736,15 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
( inf ) - > cur = ( inf ) - > buf + len ; \
}
# define PRINT_PRIORITY(x) \
( ( QO_PRIORITY ( x ) = = OP_NOT ) ? OP_NOT_PHRASE : QO_PRIORITY ( x ) )
/*
* recursive walk on tree and print it in
* infix ( human - readable ) view
* recursively traverse the tree and
* print it in infix ( human - readable ) form
*/
static void
infix ( INFIX * in , bool first )
infix ( INFIX * in , int parentPriority )
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth ( ) ;
@ -674,24 +813,22 @@ infix(INFIX *in, bool first)
}
else if ( in - > curpol - > qoperator . oper = = OP_NOT )
{
bool isopr = false ;
int priority = PRINT_PRIORITY ( in - > curpol ) ;
RESIZEBUF ( in , 1 ) ;
* ( in - > cur ) = ' ! ' ;
in - > cur + + ;
* ( in - > cur ) = ' \0 ' ;
in - > curpol + + ;
if ( in - > curpol - > type = = QI_OPR )
if ( priority < parentPriority )
{
isopr = true ;
RESIZEBUF ( in , 2 ) ;
sprintf ( in - > cur , " ( " ) ;
in - > cur = strchr ( in - > cur , ' \0 ' ) ;
}
RESIZEBUF ( in , 1 ) ;
* ( in - > cur ) = ' ! ' ;
in - > cur + + ;
* ( in - > cur ) = ' \0 ' ;
in - > curpol + + ;
infix ( in , isopr ) ;
if ( isopr )
infix ( in , pr iority ) ;
if ( pr iority < pa rentPriority )
{
RESIZEBUF ( in , 2 ) ;
sprintf ( in - > cur , " ) " ) ;
@ -701,11 +838,18 @@ infix(INFIX *in, bool first)
else
{
int8 op = in - > curpol - > qoperator . oper ;
int priority = PRINT_PRIORITY ( in - > curpol ) ;
int16 distance = in - > curpol - > qoperator . distance ;
INFIX nrm ;
bool needParenthesis = false ;
in - > curpol + + ;
if ( op = = OP_OR & & ! first )
if ( priority < parentPriority | |
( op = = OP_PHRASE & &
( priority = = parentPriority | | /* phrases are not commutative! */
parentPriority = = OP_PRIORITY ( OP_AND ) ) ) )
{
needParenthesis = true ;
RESIZEBUF ( in , 2 ) ;
sprintf ( in - > cur , " ( " ) ;
in - > cur = strchr ( in - > cur , ' \0 ' ) ;
@ -717,14 +861,14 @@ infix(INFIX *in, bool first)
nrm . cur = nrm . buf = ( char * ) palloc ( sizeof ( char ) * nrm . buflen ) ;
/* get right operand */
infix ( & nrm , false ) ;
infix ( & nrm , priority ) ;
/* get & print left operand */
in - > curpol = nrm . curpol ;
infix ( in , false ) ;
infix ( in , priority ) ;
/* print operator & right operand */
RESIZEBUF ( in , 3 + ( nrm . cur - nrm . buf ) ) ;
RESIZEBUF ( in , 3 + ( 2 + 10 /* distance */ ) + ( nrm . cur - nrm . buf ) ) ;
switch ( op )
{
case OP_OR :
@ -733,6 +877,12 @@ infix(INFIX *in, bool first)
case OP_AND :
sprintf ( in - > cur , " & %s " , nrm . buf ) ;
break ;
case OP_PHRASE :
if ( distance ! = 1 )
sprintf ( in - > cur , " <%d> %s " , distance , nrm . buf ) ;
else
sprintf ( in - > cur , " <-> %s " , nrm . buf ) ;
break ;
default :
/* OP_NOT is handled in above if-branch */
elog ( ERROR , " unrecognized operator type: %d " , op ) ;
@ -740,7 +890,7 @@ infix(INFIX *in, bool first)
in - > cur = strchr ( in - > cur , ' \0 ' ) ;
pfree ( nrm . buf ) ;
if ( op = = OP_OR & & ! first )
if ( needParenthesis )
{
RESIZEBUF ( in , 2 ) ;
sprintf ( in - > cur , " ) " ) ;
@ -749,7 +899,6 @@ infix(INFIX *in, bool first)
}
}
Datum
tsqueryout ( PG_FUNCTION_ARGS )
{
@ -768,7 +917,7 @@ tsqueryout(PG_FUNCTION_ARGS)
nrm . cur = nrm . buf = ( char * ) palloc ( sizeof ( char ) * nrm . buflen ) ;
* ( nrm . cur ) = ' \0 ' ;
nrm . op = GETOPERAND ( query ) ;
infix ( & nrm , true ) ;
infix ( & nrm , - 1 /* lowest priority */ ) ;
PG_FREE_IF_COPY ( query , 0 ) ;
PG_RETURN_CSTRING ( nrm . buf ) ;
@ -789,7 +938,8 @@ tsqueryout(PG_FUNCTION_ARGS)
*
* For each operator :
* uint8 type , QI_OPR
* uint8 operator , one of OP_AND , OP_OR , OP_NOT .
* uint8 operator , one of OP_AND , OP_PHRASE OP_OR , OP_NOT .
* uint16 distance ( only for OP_PHRASE )
*/
Datum
tsquerysend ( PG_FUNCTION_ARGS )
@ -815,6 +965,9 @@ tsquerysend(PG_FUNCTION_ARGS)
break ;
case QI_OPR :
pq_sendint ( & buf , item - > qoperator . oper , sizeof ( item - > qoperator . oper ) ) ;
if ( item - > qoperator . oper = = OP_PHRASE )
pq_sendint ( & buf , item - > qoperator . distance ,
sizeof ( item - > qoperator . distance ) ) ;
break ;
default :
elog ( ERROR , " unrecognized tsquery node type: %d " , item - > type ) ;
@ -830,15 +983,16 @@ tsquerysend(PG_FUNCTION_ARGS)
Datum
tsqueryrecv ( PG_FUNCTION_ARGS )
{
StringInfo buf = ( StringInfo ) PG_GETARG_POINTER ( 0 ) ;
TSQuery query ;
int i ,
len ;
QueryItem * item ;
int datalen ;
char * ptr ;
uint32 size ;
const char * * operands ;
StringInfo buf = ( StringInfo ) PG_GETARG_POINTER ( 0 ) ;
TSQuery query ;
int i ,
len ;
QueryItem * item ;
int datalen ;
char * ptr ;
uint32 size ;
const char * * operands ;
bool needcleanup ;
size = pq_getmsgint ( buf , sizeof ( uint32 ) ) ;
if ( size > ( MaxAllocSize / sizeof ( QueryItem ) ) )
@ -907,13 +1061,15 @@ tsqueryrecv(PG_FUNCTION_ARGS)
int8 oper ;
oper = ( int8 ) pq_getmsgint ( buf , sizeof ( int8 ) ) ;
if ( oper ! = OP_NOT & & oper ! = OP_OR & & oper ! = OP_AND )
if ( oper ! = OP_NOT & & oper ! = OP_OR & & oper ! = OP_AND & & oper ! = OP_PHRASE )
elog ( ERROR , " invalid tsquery: unrecognized operator type %d " ,
( int ) oper ) ;
if ( i = = size - 1 )
elog ( ERROR , " invalid pointer to right operand " ) ;
item - > qoperator . oper = oper ;
if ( oper = = OP_PHRASE )
item - > qoperator . distance = ( int16 ) pq_getmsgint ( buf , sizeof ( int16 ) ) ;
}
else
elog ( ERROR , " unrecognized tsquery node type: %d " , item - > type ) ;
@ -930,7 +1086,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
* Fill in the left - pointers . Checks that the tree is well - formed as a
* side - effect .
*/
findoprnd ( item , size ) ;
findoprnd ( item , size , & needcleanup ) ;
/* Copy operands to output struct */
for ( i = 0 ; i < size ; i + + )
@ -949,7 +1105,10 @@ tsqueryrecv(PG_FUNCTION_ARGS)
SET_VARSIZE ( query , len + datalen ) ;
PG_RETURN_TSVECTOR ( query ) ;
if ( needcleanup )
PG_RETURN_TSQUERY ( cleanup_fakeval_and_phrase ( query ) ) ;
PG_RETURN_TSQUERY ( query ) ;
}
/*