@ -317,7 +317,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
if ( nulls [ i ] )
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_PARAMETER_VALUE ) ,
( errcode ( ERRCODE_NULL_VALUE_NOT_ALLOWED ) ,
errmsg ( " lexeme array may not contain nulls " ) ) ) ;
lex = VARDATA ( dlexemes [ i ] ) ;
@ -430,7 +430,7 @@ compareint(const void *va, const void *vb)
/*
* Internal routine to delete lexemes from TSVector by array of offsets .
*
* int * indices_to_delete - - array of lexeme offsets to delete
* int * indices_to_delete - - array of lexeme offsets to delete ( modified here ! )
* int indices_count - - size of that array
*
* Returns new TSVector without given lexemes along with their positions
@ -445,35 +445,51 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
* arrout ;
char * data = STRPTR ( tsv ) ,
* dataout ;
int i ,
j ,
k ,
curoff ;
int i , /* index in arrin */
j , /* index in arrout */
k , /* index in indices_to_delete */
curoff ; /* index in dataout area */
/*
* Here we overestimates tsout size , since we don ' t know exact size
* occupied by positions and weights . We will set exact size later after a
* pass through TSVector .
* Sort the filter array to simplify membership checks below . Also , get
* rid of any duplicate entries , so that we can assume that indices_count
* is exactly equal to the number of lexemes that will be removed .
*/
tsout = ( TSVector ) palloc0 ( VARSIZE ( tsv ) ) ;
arrout = ARRPTR ( tsout ) ;
tsout - > size = tsv - > size - indices_count ;
/* Sort our filter array to simplify membership check later. */
if ( indices_count > 1 )
{
int kp ;
qsort ( indices_to_delete , indices_count , sizeof ( int ) , compareint ) ;
kp = 0 ;
for ( k = 1 ; k < indices_count ; k + + )
{
if ( indices_to_delete [ k ] ! = indices_to_delete [ kp ] )
indices_to_delete [ + + kp ] = indices_to_delete [ k ] ;
}
indices_count = + + kp ;
}
/*
* Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete .
* Here we overestimate tsout size , since we don ' t know how much space is
* used by the deleted lexeme ( s ) . We will set exact size below .
*/
curoff = 0 ;
tsout = ( TSVector ) palloc0 ( VARSIZE ( tsv ) ) ;
/* This count must be correct because STRPTR(tsout) relies on it. */
tsout - > size = tsv - > size - indices_count ;
/*
* Copy tsv to tsout , skipping lexemes listed in indices_to_delete .
*/
arrout = ARRPTR ( tsout ) ;
dataout = STRPTR ( tsout ) ;
curoff = 0 ;
for ( i = j = k = 0 ; i < tsv - > size ; i + + )
{
/*
* Here we should check whether current i is present in
* indices_to_delete or not . Since indices_to_delete is already sorted
* we can advance it index only when we have match .
* If current i is present in indices_to_delete , skip this lexeme .
* Since indices_to_delete is already sorted , we only need to check
* the current ( k ' th ) entry .
*/
if ( k < indices_count & & i = = indices_to_delete [ k ] )
{
@ -481,7 +497,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
continue ;
}
/* Copy lexeme, it' s positions and weights */
/* Copy lexeme and it s positions and weights */
memcpy ( dataout + curoff , data + arrin [ i ] . pos , arrin [ i ] . len ) ;
arrout [ j ] . haspos = arrin [ i ] . haspos ;
arrout [ j ] . len = arrin [ i ] . len ;
@ -489,8 +505,8 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
curoff + = arrin [ i ] . len ;
if ( arrin [ i ] . haspos )
{
int len = POSDATALEN ( tsv , arrin + i ) * sizeof ( WordEntryPos ) +
sizeof ( uint16 ) ;
int len = POSDATALEN ( tsv , arrin + i ) * sizeof ( WordEntryPos )
+ sizeof ( uint16 ) ;
curoff = SHORTALIGN ( curoff ) ;
memcpy ( dataout + curoff ,
@ -503,10 +519,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
}
/*
* After the pass through TSVector k should equals exactly to
* indices_count . If it isn ' t then the caller provided us with indices
* outside of [ 0 , tsv - > size ) range and estimation of tsout ' s size is
* wrong .
* k should now be exactly equal to indices_count . If it isn ' t then the
* caller provided us with indices outside of [ 0 , tsv - > size ) range and
* estimation of tsout ' s size is wrong .
*/
Assert ( k = = indices_count ) ;
@ -560,7 +575,7 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
/*
* In typical use case array of lexemes to delete is relatively small . So
* here we optimizing things for that scenario : iterate through lexarr
* here we optimize things for that scenario : iterate through lexarr
* performing binary search of each lexeme from lexarr in tsvector .
*/
skip_indices = palloc0 ( nlex * sizeof ( int ) ) ;
@ -572,10 +587,10 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
if ( nulls [ i ] )
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_PARAMETER_VALUE ) ,
( errcode ( ERRCODE_NULL_VALUE_NOT_ALLOWED ) ,
errmsg ( " lexeme array may not contain nulls " ) ) ) ;
lex = VARDATA ( dlexemes [ i ] ) ;
lex = VARDATA_ANY ( dlexemes [ i ] ) ;
lex_len = VARSIZE_ANY_EXHDR ( dlexemes [ i ] ) ;
lex_pos = tsvector_bsearch ( tsin , lex , lex_len ) ;
@ -738,7 +753,7 @@ array_to_tsvector(PG_FUNCTION_ARGS)
{
if ( nulls [ i ] )
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_PARAMETER_VALUE ) ,
( errcode ( ERRCODE_NULL_VALUE_NOT_ALLOWED ) ,
errmsg ( " lexeme array may not contain nulls " ) ) ) ;
datalen + = VARSIZE_ANY_EXHDR ( dlexemes [ i ] ) ;
@ -797,7 +812,7 @@ tsvector_filter(PG_FUNCTION_ARGS)
if ( nulls [ i ] )
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_PARAMETER_VALUE ) ,
( errcode ( ERRCODE_NULL_VALUE_NOT_ALLOWED ) ,
errmsg ( " weight array may not contain nulls " ) ) ) ;
char_weight = DatumGetChar ( dweights [ i ] ) ;