|
|
@ -15,9 +15,10 @@ Datum |
|
|
|
tsstat_in(PG_FUNCTION_ARGS) |
|
|
|
tsstat_in(PG_FUNCTION_ARGS) |
|
|
|
{ |
|
|
|
{ |
|
|
|
tsstat *stat = palloc(STATHDRSIZE); |
|
|
|
tsstat *stat = palloc(STATHDRSIZE); |
|
|
|
|
|
|
|
|
|
|
|
stat->len = STATHDRSIZE; |
|
|
|
stat->len = STATHDRSIZE; |
|
|
|
stat->size = 0; |
|
|
|
stat->size = 0; |
|
|
|
|
|
|
|
stat->weight = 0; |
|
|
|
PG_RETURN_POINTER(stat); |
|
|
|
PG_RETURN_POINTER(stat); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -32,6 +33,20 @@ tsstat_out(PG_FUNCTION_ARGS) |
|
|
|
PG_RETURN_NULL(); |
|
|
|
PG_RETURN_NULL(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
|
|
|
check_weight(tsvector *txt, WordEntry *wptr, int8 weight) { |
|
|
|
|
|
|
|
int len = POSDATALEN(txt, wptr); |
|
|
|
|
|
|
|
int num=0; |
|
|
|
|
|
|
|
WordEntryPos *ptr = POSDATAPTR(txt, wptr); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (len--) { |
|
|
|
|
|
|
|
if (weight & (1 << ptr->weight)) |
|
|
|
|
|
|
|
num++; |
|
|
|
|
|
|
|
ptr++; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return num; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static WordEntry ** |
|
|
|
static WordEntry ** |
|
|
|
SEI_realloc(WordEntry ** in, uint32 *len) |
|
|
|
SEI_realloc(WordEntry ** in, uint32 *len) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -83,6 +98,7 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len) |
|
|
|
totallen = CALCSTATSIZE(nentry, slen); |
|
|
|
totallen = CALCSTATSIZE(nentry, slen); |
|
|
|
newstat = palloc(totallen); |
|
|
|
newstat = palloc(totallen); |
|
|
|
newstat->len = totallen; |
|
|
|
newstat->len = totallen; |
|
|
|
|
|
|
|
newstat->weight = stat->weight; |
|
|
|
newstat->size = nentry; |
|
|
|
newstat->size = nentry; |
|
|
|
|
|
|
|
|
|
|
|
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat)); |
|
|
|
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat)); |
|
|
@ -107,8 +123,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len) |
|
|
|
} |
|
|
|
} |
|
|
|
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat)); |
|
|
|
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat)); |
|
|
|
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat))); |
|
|
|
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat))); |
|
|
|
nptr->nentry = POSDATALEN(txt, *ptr); |
|
|
|
if ( (*ptr)->haspos ) { |
|
|
|
if (nptr->nentry == 0) |
|
|
|
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); |
|
|
|
|
|
|
|
} else
|
|
|
|
nptr->nentry = 1; |
|
|
|
nptr->nentry = 1; |
|
|
|
nptr->ndoc = 1; |
|
|
|
nptr->ndoc = 1; |
|
|
|
nptr->len = (*ptr)->len; |
|
|
|
nptr->len = (*ptr)->len; |
|
|
@ -127,8 +144,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len) |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
else |
|
|
|
{ |
|
|
|
{ |
|
|
|
nptr->nentry = POSDATALEN(txt, *ptr); |
|
|
|
if ( (*ptr)->haspos ) { |
|
|
|
if (nptr->nentry == 0) |
|
|
|
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); |
|
|
|
|
|
|
|
} else
|
|
|
|
nptr->nentry = 1; |
|
|
|
nptr->nentry = 1; |
|
|
|
nptr->ndoc = 1; |
|
|
|
nptr->ndoc = 1; |
|
|
|
nptr->len = (*ptr)->len; |
|
|
|
nptr->len = (*ptr)->len; |
|
|
@ -144,8 +162,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len) |
|
|
|
|
|
|
|
|
|
|
|
while (ptr - entry < len) |
|
|
|
while (ptr - entry < len) |
|
|
|
{ |
|
|
|
{ |
|
|
|
nptr->nentry = POSDATALEN(txt, *ptr); |
|
|
|
if ( (*ptr)->haspos ) { |
|
|
|
if (nptr->nentry == 0) |
|
|
|
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); |
|
|
|
|
|
|
|
} else
|
|
|
|
nptr->nentry = 1; |
|
|
|
nptr->nentry = 1; |
|
|
|
nptr->ndoc = 1; |
|
|
|
nptr->ndoc = 1; |
|
|
|
nptr->len = (*ptr)->len; |
|
|
|
nptr->len = (*ptr)->len; |
|
|
@ -173,12 +192,14 @@ ts_accum(PG_FUNCTION_ARGS) |
|
|
|
cur = 0; |
|
|
|
cur = 0; |
|
|
|
StatEntry *sptr; |
|
|
|
StatEntry *sptr; |
|
|
|
WordEntry *wptr; |
|
|
|
WordEntry *wptr; |
|
|
|
|
|
|
|
int n=0; |
|
|
|
|
|
|
|
|
|
|
|
if (stat == NULL || PG_ARGISNULL(0)) |
|
|
|
if (stat == NULL || PG_ARGISNULL(0)) |
|
|
|
{ /* Init in first */ |
|
|
|
{ /* Init in first */ |
|
|
|
stat = palloc(STATHDRSIZE); |
|
|
|
stat = palloc(STATHDRSIZE); |
|
|
|
stat->len = STATHDRSIZE; |
|
|
|
stat->len = STATHDRSIZE; |
|
|
|
stat->size = 0; |
|
|
|
stat->size = 0; |
|
|
|
|
|
|
|
stat->weight = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* simple check of correctness */ |
|
|
|
/* simple check of correctness */ |
|
|
@ -201,32 +222,37 @@ ts_accum(PG_FUNCTION_ARGS) |
|
|
|
sptr++; |
|
|
|
sptr++; |
|
|
|
else if (cmp == 0) |
|
|
|
else if (cmp == 0) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int n = POSDATALEN(txt, wptr); |
|
|
|
if ( stat->weight == 0 ) { |
|
|
|
|
|
|
|
sptr->ndoc++; |
|
|
|
if (n == 0) |
|
|
|
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; |
|
|
|
n = 1; |
|
|
|
} else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) { |
|
|
|
sptr->ndoc++; |
|
|
|
sptr->ndoc++; |
|
|
|
sptr->nentry += n; |
|
|
|
sptr->nentry += n; |
|
|
|
|
|
|
|
} |
|
|
|
sptr++; |
|
|
|
sptr++; |
|
|
|
wptr++; |
|
|
|
wptr++; |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
else |
|
|
|
{ |
|
|
|
{ |
|
|
|
if (cur == len) |
|
|
|
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
|
|
|
|
newentry = SEI_realloc(newentry, &len); |
|
|
|
if (cur == len) |
|
|
|
newentry[cur] = wptr; |
|
|
|
newentry = SEI_realloc(newentry, &len); |
|
|
|
|
|
|
|
newentry[cur] = wptr; |
|
|
|
|
|
|
|
cur++; |
|
|
|
|
|
|
|
} |
|
|
|
wptr++; |
|
|
|
wptr++; |
|
|
|
cur++; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
while (wptr - ARRPTR(txt) < txt->size) |
|
|
|
while (wptr - ARRPTR(txt) < txt->size) |
|
|
|
{ |
|
|
|
{ |
|
|
|
if (cur == len) |
|
|
|
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
|
|
|
|
newentry = SEI_realloc(newentry, &len); |
|
|
|
if (cur == len) |
|
|
|
newentry[cur] = wptr; |
|
|
|
newentry = SEI_realloc(newentry, &len); |
|
|
|
|
|
|
|
newentry[cur] = wptr; |
|
|
|
|
|
|
|
cur++; |
|
|
|
|
|
|
|
} |
|
|
|
wptr++; |
|
|
|
wptr++; |
|
|
|
cur++; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
else |
|
|
@ -243,12 +269,13 @@ ts_accum(PG_FUNCTION_ARGS) |
|
|
|
cmp = compareStatWord(sptr, wptr, stat, txt); |
|
|
|
cmp = compareStatWord(sptr, wptr, stat, txt); |
|
|
|
if (cmp == 0) |
|
|
|
if (cmp == 0) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int n = POSDATALEN(txt, wptr); |
|
|
|
if ( stat->weight == 0 ) { |
|
|
|
|
|
|
|
sptr->ndoc++; |
|
|
|
if (n == 0) |
|
|
|
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; |
|
|
|
n = 1; |
|
|
|
} else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) { |
|
|
|
sptr->ndoc++; |
|
|
|
sptr->ndoc++; |
|
|
|
sptr->nentry += n; |
|
|
|
sptr->nentry += n; |
|
|
|
|
|
|
|
} |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
else if (cmp < 0) |
|
|
|
else if (cmp < 0) |
|
|
@ -259,10 +286,12 @@ ts_accum(PG_FUNCTION_ARGS) |
|
|
|
|
|
|
|
|
|
|
|
if (StopLow >= StopHigh) |
|
|
|
if (StopLow >= StopHigh) |
|
|
|
{ /* not found */ |
|
|
|
{ /* not found */ |
|
|
|
if (cur == len) |
|
|
|
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
|
|
|
|
newentry = SEI_realloc(newentry, &len); |
|
|
|
if (cur == len) |
|
|
|
newentry[cur] = wptr; |
|
|
|
newentry = SEI_realloc(newentry, &len); |
|
|
|
cur++; |
|
|
|
newentry[cur] = wptr; |
|
|
|
|
|
|
|
cur++; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
wptr++; |
|
|
|
wptr++; |
|
|
|
} |
|
|
|
} |
|
|
@ -389,7 +418,7 @@ get_ti_Oid(void) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static tsstat * |
|
|
|
static tsstat * |
|
|
|
ts_stat_sql(text *txt) |
|
|
|
ts_stat_sql(text *txt, text *ws) |
|
|
|
{ |
|
|
|
{ |
|
|
|
char *query = text2char(txt); |
|
|
|
char *query = text2char(txt); |
|
|
|
int i; |
|
|
|
int i; |
|
|
@ -423,6 +452,31 @@ ts_stat_sql(text *txt) |
|
|
|
stat = palloc(STATHDRSIZE); |
|
|
|
stat = palloc(STATHDRSIZE); |
|
|
|
stat->len = STATHDRSIZE; |
|
|
|
stat->len = STATHDRSIZE; |
|
|
|
stat->size = 0; |
|
|
|
stat->size = 0; |
|
|
|
|
|
|
|
stat->weight = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ( ws ) { |
|
|
|
|
|
|
|
char *buf; |
|
|
|
|
|
|
|
buf = VARDATA(ws); |
|
|
|
|
|
|
|
while( buf - VARDATA(ws) < VARSIZE(buf) - VARHDRSZ ) { |
|
|
|
|
|
|
|
switch (tolower(*buf)) { |
|
|
|
|
|
|
|
case 'a': |
|
|
|
|
|
|
|
stat->weight |= 1 << 3; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 'b': |
|
|
|
|
|
|
|
stat->weight |= 1 << 2; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 'c': |
|
|
|
|
|
|
|
stat->weight |= 1 << 1; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 'd': |
|
|
|
|
|
|
|
stat->weight |= 1; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
|
|
|
|
stat->weight |= 0; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
buf++; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
while (SPI_processed > 0) |
|
|
|
while (SPI_processed > 0) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -467,11 +521,13 @@ ts_stat(PG_FUNCTION_ARGS) |
|
|
|
{ |
|
|
|
{ |
|
|
|
tsstat *stat; |
|
|
|
tsstat *stat; |
|
|
|
text *txt = PG_GETARG_TEXT_P(0); |
|
|
|
text *txt = PG_GETARG_TEXT_P(0); |
|
|
|
|
|
|
|
text *ws = (PG_NARGS() > 1) ? PG_GETARG_TEXT_P(1) : NULL; |
|
|
|
|
|
|
|
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT(); |
|
|
|
funcctx = SRF_FIRSTCALL_INIT(); |
|
|
|
SPI_connect(); |
|
|
|
SPI_connect(); |
|
|
|
stat = ts_stat_sql(txt); |
|
|
|
stat = ts_stat_sql(txt,ws); |
|
|
|
PG_FREE_IF_COPY(txt, 0); |
|
|
|
PG_FREE_IF_COPY(txt, 0); |
|
|
|
|
|
|
|
if (PG_NARGS() > 1 ) PG_FREE_IF_COPY(ws, 1); |
|
|
|
ts_setup_firstcall(funcctx, stat); |
|
|
|
ts_setup_firstcall(funcctx, stat); |
|
|
|
SPI_finish(); |
|
|
|
SPI_finish(); |
|
|
|
} |
|
|
|
} |
|
|
|