|
|
|
|
@ -78,6 +78,7 @@ prsd_end(PG_FUNCTION_ARGS) |
|
|
|
|
|
|
|
|
|
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 ) |
|
|
|
|
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 ) |
|
|
|
|
#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 ) |
|
|
|
|
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) ) |
|
|
|
|
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) ) |
|
|
|
|
|
|
|
|
|
@ -196,6 +197,7 @@ prsd_headline(PG_FUNCTION_ARGS) |
|
|
|
|
curlen; |
|
|
|
|
|
|
|
|
|
int i; |
|
|
|
|
int highlight=0; |
|
|
|
|
|
|
|
|
|
/* config */ |
|
|
|
|
prs->startsel = NULL; |
|
|
|
|
@ -220,6 +222,15 @@ prsd_headline(PG_FUNCTION_ARGS) |
|
|
|
|
prs->startsel = pstrdup(mptr->value); |
|
|
|
|
else if (pg_strcasecmp(mptr->key, "StopSel") == 0) |
|
|
|
|
prs->stopsel = pstrdup(mptr->value); |
|
|
|
|
else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0) |
|
|
|
|
highlight = ( |
|
|
|
|
pg_strcasecmp(mptr->value, "1")==0 ||
|
|
|
|
|
pg_strcasecmp(mptr->value, "on")==0 ||
|
|
|
|
|
pg_strcasecmp(mptr->value, "true")==0 ||
|
|
|
|
|
pg_strcasecmp(mptr->value, "t")==0 ||
|
|
|
|
|
pg_strcasecmp(mptr->value, "y")==0 ||
|
|
|
|
|
pg_strcasecmp(mptr->value, "yes")==0 ) ? |
|
|
|
|
1 : 0; |
|
|
|
|
|
|
|
|
|
pfree(mptr->key); |
|
|
|
|
pfree(mptr->value); |
|
|
|
|
@ -228,124 +239,133 @@ prsd_headline(PG_FUNCTION_ARGS) |
|
|
|
|
} |
|
|
|
|
pfree(map); |
|
|
|
|
|
|
|
|
|
if (min_words >= max_words) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
if (highlight==0) { |
|
|
|
|
if (min_words >= max_words) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
|
|
|
|
errmsg("MinWords should be less than MaxWords"))); |
|
|
|
|
if (min_words <= 0) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
if (min_words <= 0) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
|
|
|
|
errmsg("MinWords should be positive"))); |
|
|
|
|
if (shortword < 0) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
if (shortword < 0) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
|
|
|
|
errmsg("ShortWord should be >= 0"))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
while (hlCover(prs, query, &p, &q)) |
|
|
|
|
{ |
|
|
|
|
/* find cover len in words */ |
|
|
|
|
curlen = 0; |
|
|
|
|
poslen = 0; |
|
|
|
|
for (i = p; i <= q && curlen < max_words; i++) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen++; |
|
|
|
|
pose = i; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)) |
|
|
|
|
if (highlight==0) { |
|
|
|
|
while (hlCover(prs, query, &p, &q)) |
|
|
|
|
{ |
|
|
|
|
/* best already finded, so try one more cover */ |
|
|
|
|
p++; |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
posb=p; |
|
|
|
|
if (curlen < max_words) |
|
|
|
|
{ /* find good end */ |
|
|
|
|
for (i = i - 1; i < prs->curwords && curlen < max_words; i++) |
|
|
|
|
/* find cover len in words */ |
|
|
|
|
curlen = 0; |
|
|
|
|
poslen = 0; |
|
|
|
|
for (i = p; i <= q && curlen < max_words; i++) |
|
|
|
|
{ |
|
|
|
|
if (i != q) |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen++; |
|
|
|
|
pose = i; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)) |
|
|
|
|
{ |
|
|
|
|
/* best already finded, so try one more cover */ |
|
|
|
|
p++; |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
posb=p; |
|
|
|
|
if (curlen < max_words) |
|
|
|
|
{ /* find good end */ |
|
|
|
|
for (i = i - 1; i < prs->curwords && curlen < max_words; i++) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen++; |
|
|
|
|
if (i != q) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen++; |
|
|
|
|
} |
|
|
|
|
pose = i; |
|
|
|
|
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) |
|
|
|
|
continue; |
|
|
|
|
if (curlen >= min_words) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */ |
|
|
|
|
for(i=p; i>= 0; i--) { |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen++; |
|
|
|
|
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) |
|
|
|
|
continue; |
|
|
|
|
if (curlen >= min_words) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
posb=(i>=0) ? i : 0; |
|
|
|
|
} |
|
|
|
|
pose = i; |
|
|
|
|
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) |
|
|
|
|
continue; |
|
|
|
|
if (curlen >= min_words) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */ |
|
|
|
|
for(i=p; i>= 0; i--) { |
|
|
|
|
else |
|
|
|
|
{ /* shorter cover :((( */ |
|
|
|
|
for (; curlen > min_words; i--) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
curlen--; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen++; |
|
|
|
|
poslen--; |
|
|
|
|
pose = i; |
|
|
|
|
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) |
|
|
|
|
continue; |
|
|
|
|
if (curlen >= min_words) |
|
|
|
|
break; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
posb=(i>=0) ? i : 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ /* shorter cover :((( */ |
|
|
|
|
for (; curlen > min_words; i--) |
|
|
|
|
|
|
|
|
|
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || |
|
|
|
|
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && |
|
|
|
|
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen--; |
|
|
|
|
if (prs->words[i].item && !prs->words[i].repeated) |
|
|
|
|
poslen--; |
|
|
|
|
pose = i; |
|
|
|
|
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) |
|
|
|
|
continue; |
|
|
|
|
break; |
|
|
|
|
bestb = posb; |
|
|
|
|
beste = pose; |
|
|
|
|
bestlen = poslen; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
p++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || |
|
|
|
|
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && |
|
|
|
|
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))) |
|
|
|
|
if (bestlen < 0) |
|
|
|
|
{ |
|
|
|
|
bestb = posb; |
|
|
|
|
curlen = 0; |
|
|
|
|
for (i = 0; i < prs->curwords && curlen < min_words; i++) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
pose = i; |
|
|
|
|
} |
|
|
|
|
bestb = 0; |
|
|
|
|
beste = pose; |
|
|
|
|
bestlen = poslen; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
p++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (bestlen < 0) |
|
|
|
|
{ |
|
|
|
|
curlen = 0; |
|
|
|
|
poslen = 0; |
|
|
|
|
for (i = 0; i < prs->curwords && curlen < min_words; i++) |
|
|
|
|
{ |
|
|
|
|
if (!NONWORDTOKEN(prs->words[i].type)) |
|
|
|
|
curlen++; |
|
|
|
|
pose = i; |
|
|
|
|
} |
|
|
|
|
bestb = 0; |
|
|
|
|
beste = pose; |
|
|
|
|
} else { |
|
|
|
|
bestb=0; |
|
|
|
|
beste=prs->curwords-1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (i = bestb; i <= beste; i++) |
|
|
|
|
{ |
|
|
|
|
if (prs->words[i].item) |
|
|
|
|
prs->words[i].selected = 1; |
|
|
|
|
if (prs->words[i].repeated) |
|
|
|
|
prs->words[i].skip = 1; |
|
|
|
|
if (HLIDIGNORE(prs->words[i].type)) |
|
|
|
|
prs->words[i].replace = 1; |
|
|
|
|
if ( highlight==0 ) {
|
|
|
|
|
if (HLIDIGNORE(prs->words[i].type)) |
|
|
|
|
prs->words[i].replace = 1; |
|
|
|
|
} else { |
|
|
|
|
if (HTMLHLIDIGNORE(prs->words[i].type)) |
|
|
|
|
prs->words[i].replace = 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
prs->words[i].in = 1; |
|
|
|
|
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (!prs->startsel) |
|
|
|
|
|