mirror of https://github.com/postgres/postgres
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.REL8_3_STABLE
parent
fb631dba2a
commit
5fcb079858
@ -0,0 +1,19 @@ |
||||
# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = dict_int
|
||||
OBJS = dict_int.o
|
||||
DATA_built = dict_int.sql
|
||||
DATA = uninstall_dict_int.sql
|
||||
DOCS = README.dict_int
|
||||
REGRESS = dict_int
|
||||
|
||||
ifdef USE_PGXS |
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS) |
||||
else |
||||
subdir = contrib/dict_int
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
endif |
@ -0,0 +1,41 @@ |
||||
Dictionary for integers |
||||
======================= |
||||
|
||||
The motivation for this example dictionary is to control the indexing of |
||||
integers (signed and unsigned), and, consequently, to minimize the number of |
||||
unique words which greatly affect the performance of searching. |
||||
|
||||
* Configuration |
||||
|
||||
The dictionary accepts two options: |
||||
|
||||
- The MAXLEN parameter specifies the maximum length (number of digits) |
||||
allowed in an integer word. The default value is 6. |
||||
|
||||
- The REJECTLONG parameter specifies if an overlength integer should be |
||||
truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns |
||||
the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the |
||||
dictionary treats an overlength integer as a stop word, so that it will |
||||
not be indexed. |
||||
|
||||
* Usage |
||||
|
||||
1. Compile and install |
||||
|
||||
2. Load dictionary |
||||
|
||||
psql mydb < dict_int.sql |
||||
|
||||
3. Test it |
||||
|
||||
mydb# select ts_lexize('intdict', '12345678'); |
||||
ts_lexize |
||||
----------- |
||||
{123456} |
||||
|
||||
4. Change its options as you wish |
||||
|
||||
mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true); |
||||
ALTER TEXT SEARCH DICTIONARY |
||||
|
||||
That's all. |
@ -0,0 +1,99 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* dict_int.c |
||||
* Text search dictionary for integers |
||||
* |
||||
* Copyright (c) 2007, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $ |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "commands/defrem.h" |
||||
#include "fmgr.h" |
||||
#include "tsearch/ts_public.h" |
||||
|
||||
PG_MODULE_MAGIC; |
||||
|
||||
|
||||
typedef struct { |
||||
int maxlen; |
||||
bool rejectlong; |
||||
} DictInt; |
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dintdict_init); |
||||
Datum dintdict_init(PG_FUNCTION_ARGS); |
||||
|
||||
PG_FUNCTION_INFO_V1(dintdict_lexize); |
||||
Datum dintdict_lexize(PG_FUNCTION_ARGS); |
||||
|
||||
Datum |
||||
dintdict_init(PG_FUNCTION_ARGS) |
||||
{ |
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0); |
||||
DictInt *d; |
||||
ListCell *l; |
||||
|
||||
d = (DictInt *) palloc0(sizeof(DictInt)); |
||||
d->maxlen = 6; |
||||
d->rejectlong = false; |
||||
|
||||
foreach(l, dictoptions) |
||||
{ |
||||
DefElem *defel = (DefElem *) lfirst(l); |
||||
|
||||
if (pg_strcasecmp(defel->defname, "MAXLEN") == 0) |
||||
{ |
||||
d->maxlen = atoi(defGetString(defel)); |
||||
} |
||||
else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0) |
||||
{ |
||||
d->rejectlong = defGetBoolean(defel); |
||||
} |
||||
else |
||||
{ |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
||||
errmsg("unrecognized intdict parameter: \"%s\"", |
||||
defel->defname))); |
||||
} |
||||
} |
||||
|
||||
PG_RETURN_POINTER(d); |
||||
} |
||||
|
||||
Datum |
||||
dintdict_lexize(PG_FUNCTION_ARGS) |
||||
{ |
||||
DictInt *d = (DictInt*)PG_GETARG_POINTER(0); |
||||
char *in = (char*)PG_GETARG_POINTER(1); |
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2)); |
||||
TSLexeme *res=palloc(sizeof(TSLexeme)*2); |
||||
|
||||
res[1].lexeme = NULL; |
||||
if (PG_GETARG_INT32(2) > d->maxlen) |
||||
{ |
||||
if ( d->rejectlong ) |
||||
{ |
||||
/* reject by returning void array */ |
||||
pfree(txt); |
||||
res[0].lexeme = NULL; |
||||
} |
||||
else |
||||
{ |
||||
/* trim integer */ |
||||
txt[d->maxlen] = '\0'; |
||||
res[0].lexeme = txt; |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
res[0].lexeme = txt; |
||||
} |
||||
|
||||
PG_RETURN_POINTER(res); |
||||
} |
@ -0,0 +1,29 @@ |
||||
-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $ |
||||
|
||||
-- Adjust this setting to control where the objects get created. |
||||
SET search_path = public; |
||||
|
||||
BEGIN; |
||||
|
||||
CREATE FUNCTION dintdict_init(internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE TEXT SEARCH TEMPLATE intdict_template ( |
||||
LEXIZE = dintdict_lexize, |
||||
INIT = dintdict_init |
||||
); |
||||
|
||||
CREATE TEXT SEARCH DICTIONARY intdict ( |
||||
TEMPLATE = intdict_template |
||||
); |
||||
|
||||
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers'; |
||||
|
||||
END; |
@ -0,0 +1,308 @@ |
||||
-- |
||||
-- first, define the datatype. Turn off echoing so that expected file |
||||
-- does not depend on contents of this file. |
||||
-- |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
RESET client_min_messages; |
||||
--lexize |
||||
select ts_lexize('intdict', '511673'); |
||||
ts_lexize |
||||
----------- |
||||
{511673} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '129'); |
||||
ts_lexize |
||||
----------- |
||||
{129} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '40865854'); |
||||
ts_lexize |
||||
----------- |
||||
{408658} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '952'); |
||||
ts_lexize |
||||
----------- |
||||
{952} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '654980341'); |
||||
ts_lexize |
||||
----------- |
||||
{654980} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '09810106'); |
||||
ts_lexize |
||||
----------- |
||||
{098101} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '14262713'); |
||||
ts_lexize |
||||
----------- |
||||
{142627} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '6532082986'); |
||||
ts_lexize |
||||
----------- |
||||
{653208} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '0150061'); |
||||
ts_lexize |
||||
----------- |
||||
{015006} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '7778'); |
||||
ts_lexize |
||||
----------- |
||||
{7778} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '9547'); |
||||
ts_lexize |
||||
----------- |
||||
{9547} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '753395478'); |
||||
ts_lexize |
||||
----------- |
||||
{753395} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '647652'); |
||||
ts_lexize |
||||
----------- |
||||
{647652} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '6988655574'); |
||||
ts_lexize |
||||
----------- |
||||
{698865} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '1279'); |
||||
ts_lexize |
||||
----------- |
||||
{1279} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '1266645909'); |
||||
ts_lexize |
||||
----------- |
||||
{126664} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '7594193969'); |
||||
ts_lexize |
||||
----------- |
||||
{759419} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '16928207'); |
||||
ts_lexize |
||||
----------- |
||||
{169282} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '196850350328'); |
||||
ts_lexize |
||||
----------- |
||||
{196850} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '22026985592'); |
||||
ts_lexize |
||||
----------- |
||||
{220269} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '2063765'); |
||||
ts_lexize |
||||
----------- |
||||
{206376} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '242387310'); |
||||
ts_lexize |
||||
----------- |
||||
{242387} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '93595'); |
||||
ts_lexize |
||||
----------- |
||||
{93595} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '9374'); |
||||
ts_lexize |
||||
----------- |
||||
{9374} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '996969'); |
||||
ts_lexize |
||||
----------- |
||||
{996969} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '353595982'); |
||||
ts_lexize |
||||
----------- |
||||
{353595} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '925860'); |
||||
ts_lexize |
||||
----------- |
||||
{925860} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '11848378337'); |
||||
ts_lexize |
||||
----------- |
||||
{118483} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '333'); |
||||
ts_lexize |
||||
----------- |
||||
{333} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '799287416765'); |
||||
ts_lexize |
||||
----------- |
||||
{799287} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '745939'); |
||||
ts_lexize |
||||
----------- |
||||
{745939} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '67601305734'); |
||||
ts_lexize |
||||
----------- |
||||
{676013} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '3361113'); |
||||
ts_lexize |
||||
----------- |
||||
{336111} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '9033778607'); |
||||
ts_lexize |
||||
----------- |
||||
{903377} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '7507648'); |
||||
ts_lexize |
||||
----------- |
||||
{750764} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '1166'); |
||||
ts_lexize |
||||
----------- |
||||
{1166} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '9360498'); |
||||
ts_lexize |
||||
----------- |
||||
{936049} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '917795'); |
||||
ts_lexize |
||||
----------- |
||||
{917795} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '9387894'); |
||||
ts_lexize |
||||
----------- |
||||
{938789} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '42764329'); |
||||
ts_lexize |
||||
----------- |
||||
{427643} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '564062'); |
||||
ts_lexize |
||||
----------- |
||||
{564062} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '5413377'); |
||||
ts_lexize |
||||
----------- |
||||
{541337} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '060965'); |
||||
ts_lexize |
||||
----------- |
||||
{060965} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '08273593'); |
||||
ts_lexize |
||||
----------- |
||||
{082735} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '593556010144'); |
||||
ts_lexize |
||||
----------- |
||||
{593556} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '17988843352'); |
||||
ts_lexize |
||||
----------- |
||||
{179888} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '252281774'); |
||||
ts_lexize |
||||
----------- |
||||
{252281} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '313425'); |
||||
ts_lexize |
||||
----------- |
||||
{313425} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '641439323669'); |
||||
ts_lexize |
||||
----------- |
||||
{641439} |
||||
(1 row) |
||||
|
||||
select ts_lexize('intdict', '314532610153'); |
||||
ts_lexize |
||||
----------- |
||||
{314532} |
||||
(1 row) |
||||
|
@ -0,0 +1,61 @@ |
||||
-- |
||||
-- first, define the datatype. Turn off echoing so that expected file |
||||
-- does not depend on contents of this file. |
||||
-- |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
\i dict_int.sql |
||||
\set ECHO all |
||||
RESET client_min_messages; |
||||
|
||||
--lexize |
||||
select ts_lexize('intdict', '511673'); |
||||
select ts_lexize('intdict', '129'); |
||||
select ts_lexize('intdict', '40865854'); |
||||
select ts_lexize('intdict', '952'); |
||||
select ts_lexize('intdict', '654980341'); |
||||
select ts_lexize('intdict', '09810106'); |
||||
select ts_lexize('intdict', '14262713'); |
||||
select ts_lexize('intdict', '6532082986'); |
||||
select ts_lexize('intdict', '0150061'); |
||||
select ts_lexize('intdict', '7778'); |
||||
select ts_lexize('intdict', '9547'); |
||||
select ts_lexize('intdict', '753395478'); |
||||
select ts_lexize('intdict', '647652'); |
||||
select ts_lexize('intdict', '6988655574'); |
||||
select ts_lexize('intdict', '1279'); |
||||
select ts_lexize('intdict', '1266645909'); |
||||
select ts_lexize('intdict', '7594193969'); |
||||
select ts_lexize('intdict', '16928207'); |
||||
select ts_lexize('intdict', '196850350328'); |
||||
select ts_lexize('intdict', '22026985592'); |
||||
select ts_lexize('intdict', '2063765'); |
||||
select ts_lexize('intdict', '242387310'); |
||||
select ts_lexize('intdict', '93595'); |
||||
select ts_lexize('intdict', '9374'); |
||||
select ts_lexize('intdict', '996969'); |
||||
select ts_lexize('intdict', '353595982'); |
||||
select ts_lexize('intdict', '925860'); |
||||
select ts_lexize('intdict', '11848378337'); |
||||
select ts_lexize('intdict', '333'); |
||||
select ts_lexize('intdict', '799287416765'); |
||||
select ts_lexize('intdict', '745939'); |
||||
select ts_lexize('intdict', '67601305734'); |
||||
select ts_lexize('intdict', '3361113'); |
||||
select ts_lexize('intdict', '9033778607'); |
||||
select ts_lexize('intdict', '7507648'); |
||||
select ts_lexize('intdict', '1166'); |
||||
select ts_lexize('intdict', '9360498'); |
||||
select ts_lexize('intdict', '917795'); |
||||
select ts_lexize('intdict', '9387894'); |
||||
select ts_lexize('intdict', '42764329'); |
||||
select ts_lexize('intdict', '564062'); |
||||
select ts_lexize('intdict', '5413377'); |
||||
select ts_lexize('intdict', '060965'); |
||||
select ts_lexize('intdict', '08273593'); |
||||
select ts_lexize('intdict', '593556010144'); |
||||
select ts_lexize('intdict', '17988843352'); |
||||
select ts_lexize('intdict', '252281774'); |
||||
select ts_lexize('intdict', '313425'); |
||||
select ts_lexize('intdict', '641439323669'); |
||||
select ts_lexize('intdict', '314532610153'); |
@ -0,0 +1,9 @@ |
||||
SET search_path = public; |
||||
|
||||
DROP TEXT SEARCH DICTIONARY intdict; |
||||
|
||||
DROP TEXT SEARCH TEMPLATE intdict_template; |
||||
|
||||
DROP FUNCTION dintdict_init(internal); |
||||
|
||||
DROP FUNCTION dintdict_lexize(internal,internal,internal,internal); |
@ -0,0 +1,38 @@ |
||||
# $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = dict_xsyn
|
||||
OBJS = dict_xsyn.o
|
||||
DATA_built = dict_xsyn.sql
|
||||
DATA = uninstall_dict_xsyn.sql
|
||||
DOCS = README.dict_xsyn
|
||||
REGRESS = dict_xsyn
|
||||
|
||||
DICTDIR = tsearch_data
|
||||
DICTFILES = xsyn_sample.rules
|
||||
|
||||
ifdef USE_PGXS |
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS) |
||||
else |
||||
subdir = contrib/dict_xsyn
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
endif |
||||
|
||||
install: install-data |
||||
|
||||
.PHONY: install-data |
||||
install-data: $(DICTFILES) |
||||
for i in $(DICTFILES); \
|
||||
do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
|
||||
done
|
||||
|
||||
uninstall: uninstall-data |
||||
|
||||
.PHONY: uninstall-data |
||||
uninstall-data: |
||||
for i in $(DICTFILES); \
|
||||
do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
|
||||
done
|
@ -0,0 +1,52 @@ |
||||
Extended Synonym dictionary |
||||
=========================== |
||||
|
||||
This is a simple synonym dictionary. It replaces words with groups of their |
||||
synonyms, and so makes it possible to search for a word using any of its |
||||
synonyms. |
||||
|
||||
* Configuration |
||||
|
||||
It accepts the following options: |
||||
|
||||
- KEEPORIG controls whether the original word is included, or only its |
||||
synonyms. Default is 'true'. |
||||
|
||||
- RULES is the base name of the file containing the list of synonyms. |
||||
This file must be in $(prefix)/share/tsearch_data/, and its name must |
||||
end in ".rules" (which is not included in the RULES parameter). |
||||
|
||||
The rules file has the following format: |
||||
|
||||
- Each line represents a group of synonyms for a single word, which is |
||||
given first on the line. Synonyms are separated by whitespace: |
||||
|
||||
word syn1 syn2 syn3 |
||||
|
||||
- Sharp ('#') sign is a comment delimiter. It may appear at any position |
||||
inside the line. The rest of the line will be skipped. |
||||
|
||||
Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/, |
||||
for an example. |
||||
|
||||
* Usage |
||||
|
||||
1. Compile and install |
||||
|
||||
2. Load dictionary |
||||
|
||||
psql mydb < dict_xsyn.sql |
||||
|
||||
3. Test it |
||||
|
||||
mydb=# SELECT ts_lexize('xsyn','word'); |
||||
ts_lexize |
||||
---------------- |
||||
{word,syn1,syn2,syn3) |
||||
|
||||
4. Change the dictionary options as you wish |
||||
|
||||
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false); |
||||
ALTER TEXT SEARCH DICTIONARY |
||||
|
||||
That's all. |
@ -0,0 +1,235 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* dict_xsyn.c |
||||
* Extended synonym dictionary |
||||
* |
||||
* Copyright (c) 2007, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $ |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include <ctype.h> |
||||
|
||||
#include "commands/defrem.h" |
||||
#include "fmgr.h" |
||||
#include "storage/fd.h" |
||||
#include "tsearch/ts_locale.h" |
||||
#include "tsearch/ts_utils.h" |
||||
|
||||
PG_MODULE_MAGIC; |
||||
|
||||
typedef struct |
||||
{ |
||||
char *key; /* Word */ |
||||
char *value; /* Unparsed list of synonyms, including the word itself */ |
||||
} Syn; |
||||
|
||||
typedef struct |
||||
{ |
||||
int len; |
||||
Syn *syn; |
||||
|
||||
bool keeporig; |
||||
} DictSyn; |
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dxsyn_init); |
||||
Datum dxsyn_init(PG_FUNCTION_ARGS); |
||||
|
||||
PG_FUNCTION_INFO_V1(dxsyn_lexize); |
||||
Datum dxsyn_lexize(PG_FUNCTION_ARGS); |
||||
|
||||
static char * |
||||
find_word(char *in, char **end) |
||||
{ |
||||
char *start; |
||||
|
||||
*end = NULL; |
||||
while (*in && t_isspace(in)) |
||||
in += pg_mblen(in); |
||||
|
||||
if (!*in || *in == '#') |
||||
return NULL; |
||||
start = in; |
||||
|
||||
while (*in && !t_isspace(in)) |
||||
in += pg_mblen(in); |
||||
|
||||
*end = in; |
||||
|
||||
return start; |
||||
} |
||||
|
||||
static int |
||||
compare_syn(const void *a, const void *b) |
||||
{ |
||||
return strcmp(((Syn *) a)->key, ((Syn *) b)->key); |
||||
} |
||||
|
||||
static void |
||||
read_dictionary(DictSyn *d, char *filename) |
||||
{ |
||||
char *real_filename = get_tsearch_config_filename(filename, "rules"); |
||||
FILE *fin; |
||||
char *line; |
||||
int cur = 0; |
||||
|
||||
if ((fin = AllocateFile(real_filename, "r")) == NULL) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR), |
||||
errmsg("could not open synonym file \"%s\": %m", |
||||
real_filename))); |
||||
|
||||
while ((line = t_readline(fin)) != NULL) |
||||
{ |
||||
char *value; |
||||
char *key; |
||||
char *end = NULL; |
||||
|
||||
if (*line == '\0') |
||||
continue; |
||||
|
||||
value = lowerstr(line); |
||||
pfree(line); |
||||
|
||||
key = find_word(value, &end); |
||||
if (!key) |
||||
{ |
||||
pfree(value); |
||||
continue; |
||||
} |
||||
|
||||
if (cur == d->len) |
||||
{ |
||||
d->len = (d->len > 0) ? 2 * d->len : 16; |
||||
if (d->syn) |
||||
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); |
||||
else |
||||
d->syn = (Syn *) palloc(sizeof(Syn) * d->len); |
||||
} |
||||
|
||||
d->syn[cur].key = pnstrdup(key, end - key); |
||||
d->syn[cur].value = value; |
||||
|
||||
cur++; |
||||
} |
||||
|
||||
FreeFile(fin); |
||||
|
||||
d->len = cur; |
||||
if (cur > 1) |
||||
qsort(d->syn, d->len, sizeof(Syn), compare_syn); |
||||
|
||||
pfree(real_filename); |
||||
} |
||||
|
||||
Datum |
||||
dxsyn_init(PG_FUNCTION_ARGS) |
||||
{ |
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0); |
||||
DictSyn *d; |
||||
ListCell *l; |
||||
|
||||
d = (DictSyn *) palloc0(sizeof(DictSyn)); |
||||
d->len = 0; |
||||
d->syn = NULL; |
||||
d->keeporig = true; |
||||
|
||||
foreach(l, dictoptions) |
||||
{ |
||||
DefElem *defel = (DefElem *) lfirst(l); |
||||
|
||||
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0) |
||||
{ |
||||
d->keeporig = defGetBoolean(defel); |
||||
} |
||||
else if (pg_strcasecmp(defel->defname, "RULES") == 0) |
||||
{ |
||||
read_dictionary(d, defGetString(defel)); |
||||
} |
||||
else |
||||
{ |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
||||
errmsg("unrecognized xsyn parameter: \"%s\"", |
||||
defel->defname))); |
||||
} |
||||
} |
||||
|
||||
PG_RETURN_POINTER(d); |
||||
} |
||||
|
||||
Datum |
||||
dxsyn_lexize(PG_FUNCTION_ARGS) |
||||
{ |
||||
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); |
||||
char *in = (char *) PG_GETARG_POINTER(1); |
||||
int length = PG_GETARG_INT32(2); |
||||
Syn word; |
||||
Syn *found; |
||||
TSLexeme *res = NULL; |
||||
|
||||
if (!length || d->len == 0) |
||||
PG_RETURN_POINTER(NULL); |
||||
|
||||
/* Create search pattern */ |
||||
{ |
||||
char *temp = pnstrdup(in, length); |
||||
|
||||
word.key = lowerstr(temp); |
||||
pfree(temp); |
||||
word.value = NULL; |
||||
} |
||||
|
||||
/* Look for matching syn */ |
||||
found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn); |
||||
pfree(word.key); |
||||
|
||||
if (!found) |
||||
PG_RETURN_POINTER(NULL); |
||||
|
||||
/* Parse string of synonyms and return array of words */ |
||||
{ |
||||
char *value = pstrdup(found->value); |
||||
int value_length = strlen(value); |
||||
char *pos = value; |
||||
int nsyns = 0; |
||||
bool is_first = true; |
||||
|
||||
res = palloc(0); |
||||
|
||||
while(pos < value + value_length) |
||||
{ |
||||
char *end; |
||||
char *syn = find_word(pos, &end); |
||||
|
||||
if (!syn) |
||||
break; |
||||
*end = '\0'; |
||||
|
||||
res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2)); |
||||
res[nsyns].lexeme = NULL; |
||||
|
||||
/* first word is added to result only if KEEPORIG flag is set */ |
||||
if(d->keeporig || !is_first) |
||||
{ |
||||
res[nsyns].lexeme = pstrdup(syn); |
||||
res[nsyns + 1].lexeme = NULL; |
||||
|
||||
nsyns++; |
||||
} |
||||
|
||||
is_first = false; |
||||
|
||||
pos = end + 1; |
||||
} |
||||
|
||||
pfree(value); |
||||
} |
||||
|
||||
PG_RETURN_POINTER(res); |
||||
} |
@ -0,0 +1,29 @@ |
||||
-- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $ |
||||
|
||||
-- Adjust this setting to control where the objects get created. |
||||
SET search_path = public; |
||||
|
||||
BEGIN; |
||||
|
||||
CREATE FUNCTION dxsyn_init(internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE TEXT SEARCH TEMPLATE xsyn_template ( |
||||
LEXIZE = dxsyn_lexize, |
||||
INIT = dxsyn_init |
||||
); |
||||
|
||||
CREATE TEXT SEARCH DICTIONARY xsyn ( |
||||
TEMPLATE = xsyn_template |
||||
); |
||||
|
||||
COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary'; |
||||
|
||||
END; |
@ -0,0 +1,22 @@ |
||||
-- |
||||
-- first, define the datatype. Turn off echoing so that expected file |
||||
-- does not depend on contents of this file. |
||||
-- |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
RESET client_min_messages; |
||||
--configuration |
||||
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false); |
||||
--lexize |
||||
SELECT ts_lexize('xsyn', 'supernova'); |
||||
ts_lexize |
||||
---------------- |
||||
{sn,sne,1987a} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('xsyn', 'grb'); |
||||
ts_lexize |
||||
----------- |
||||
|
||||
(1 row) |
||||
|
@ -0,0 +1,16 @@ |
||||
-- |
||||
-- first, define the datatype. Turn off echoing so that expected file |
||||
-- does not depend on contents of this file. |
||||
-- |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
\i dict_xsyn.sql |
||||
\set ECHO all |
||||
RESET client_min_messages; |
||||
|
||||
--configuration |
||||
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false); |
||||
|
||||
--lexize |
||||
SELECT ts_lexize('xsyn', 'supernova'); |
||||
SELECT ts_lexize('xsyn', 'grb'); |
@ -0,0 +1,9 @@ |
||||
SET search_path = public; |
||||
|
||||
DROP TEXT SEARCH DICTIONARY xsyn; |
||||
|
||||
DROP TEXT SEARCH TEMPLATE xsyn_template; |
||||
|
||||
DROP FUNCTION dxsyn_init(internal); |
||||
|
||||
DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal); |
@ -0,0 +1,6 @@ |
||||
# Sample rules file for eXtended Synonym (xsyn) dictionary |
||||
# format is as follows: |
||||
# |
||||
# word synonym1 synonym2 ... |
||||
# |
||||
supernova sn sne 1987a |
@ -0,0 +1,19 @@ |
||||
# $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = test_parser
|
||||
OBJS = test_parser.o
|
||||
DATA_built = test_parser.sql
|
||||
DATA = uninstall_test_parser.sql
|
||||
DOCS = README.test_parser
|
||||
REGRESS = test_parser
|
||||
|
||||
ifdef USE_PGXS |
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS) |
||||
else |
||||
subdir = contrib/test_parser
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
endif |
@ -0,0 +1,52 @@ |
||||
Example parser |
||||
============== |
||||
|
||||
This is an example of a custom parser for full text search. |
||||
|
||||
It recognizes space-delimited words and returns only two token types: |
||||
|
||||
- 3, word, Word |
||||
|
||||
- 12, blank, Space symbols |
||||
|
||||
The token numbers have been chosen to keep compatibility with the default |
||||
ts_headline() function, since we do not want to implement our own version. |
||||
|
||||
* Configuration |
||||
|
||||
The parser has no user-configurable parameters. |
||||
|
||||
* Usage |
||||
|
||||
1. Compile and install |
||||
|
||||
2. Load dictionary |
||||
|
||||
psql mydb < test_parser.sql |
||||
|
||||
3. Test it |
||||
|
||||
mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser'); |
||||
tokid | token |
||||
-------+-------- |
||||
3 | That's |
||||
12 | |
||||
3 | my |
||||
12 | |
||||
3 | first |
||||
12 | |
||||
3 | own |
||||
12 | |
||||
3 | parser |
||||
|
||||
mydb# SELECT to_tsvector('testcfg','That''s my first own parser'); |
||||
to_tsvector |
||||
------------------------------------------------- |
||||
'my':2 'own':4 'first':3 'parser':5 'that''s':1 |
||||
|
||||
mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star')); |
||||
headline |
||||
----------------------------------------------------------------- |
||||
Supernovae <b>stars</b> are the brightest phenomena in galaxies |
||||
|
||||
That's all. |
@ -0,0 +1,50 @@ |
||||
-- |
||||
-- first, define the parser. Turn off echoing so that expected file |
||||
-- does not depend on contents of this file. |
||||
-- |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
RESET client_min_messages; |
||||
-- make test configuration using parser |
||||
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser); |
||||
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple; |
||||
-- ts_parse |
||||
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/'); |
||||
tokid | token |
||||
-------+----------------------- |
||||
3 | That's |
||||
12 | |
||||
3 | simple |
||||
12 | |
||||
3 | parser |
||||
12 | |
||||
3 | can't |
||||
12 | |
||||
3 | parse |
||||
12 | |
||||
3 | urls |
||||
12 | |
||||
3 | like |
||||
12 | |
||||
3 | http://some.url/here/ |
||||
(15 rows) |
||||
|
||||
SELECT to_tsvector('testcfg','That''s my first own parser'); |
||||
to_tsvector |
||||
------------------------------------------------- |
||||
'my':2 'own':4 'first':3 'parser':5 'that''s':1 |
||||
(1 row) |
||||
|
||||
SELECT to_tsquery('testcfg', 'star'); |
||||
to_tsquery |
||||
------------ |
||||
'star' |
||||
(1 row) |
||||
|
||||
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', |
||||
to_tsquery('testcfg', 'stars')); |
||||
ts_headline |
||||
----------------------------------------------------------------- |
||||
Supernovae <b>stars</b> are the brightest phenomena in galaxies |
||||
(1 row) |
||||
|
@ -0,0 +1,26 @@ |
||||
-- |
||||
-- first, define the parser. Turn off echoing so that expected file |
||||
-- does not depend on contents of this file. |
||||
-- |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
\i test_parser.sql |
||||
\set ECHO all |
||||
RESET client_min_messages; |
||||
|
||||
-- make test configuration using parser |
||||
|
||||
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser); |
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple; |
||||
|
||||
-- ts_parse |
||||
|
||||
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/'); |
||||
|
||||
SELECT to_tsvector('testcfg','That''s my first own parser'); |
||||
|
||||
SELECT to_tsquery('testcfg', 'star'); |
||||
|
||||
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', |
||||
to_tsquery('testcfg', 'stars')); |
@ -0,0 +1,130 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* test_parser.c |
||||
* Simple example of a text search parser |
||||
* |
||||
* Copyright (c) 2007, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $ |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "fmgr.h" |
||||
|
||||
PG_MODULE_MAGIC; |
||||
|
||||
|
||||
/*
|
||||
* types |
||||
*/ |
||||
|
||||
/* self-defined type */ |
||||
typedef struct { |
||||
char * buffer; /* text to parse */ |
||||
int len; /* length of the text in buffer */ |
||||
int pos; /* position of the parser */ |
||||
} ParserState; |
||||
|
||||
/* copy-paste from wparser.h of tsearch2 */ |
||||
typedef struct { |
||||
int lexid; |
||||
char *alias; |
||||
char *descr; |
||||
} LexDescr; |
||||
|
||||
/*
|
||||
* prototypes |
||||
*/ |
||||
PG_FUNCTION_INFO_V1(testprs_start); |
||||
Datum testprs_start(PG_FUNCTION_ARGS); |
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_getlexeme); |
||||
Datum testprs_getlexeme(PG_FUNCTION_ARGS); |
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_end); |
||||
Datum testprs_end(PG_FUNCTION_ARGS); |
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_lextype); |
||||
Datum testprs_lextype(PG_FUNCTION_ARGS); |
||||
|
||||
/*
|
||||
* functions |
||||
*/ |
||||
Datum testprs_start(PG_FUNCTION_ARGS) |
||||
{ |
||||
ParserState *pst = (ParserState *) palloc0(sizeof(ParserState)); |
||||
pst->buffer = (char *) PG_GETARG_POINTER(0); |
||||
pst->len = PG_GETARG_INT32(1); |
||||
pst->pos = 0; |
||||
|
||||
PG_RETURN_POINTER(pst); |
||||
} |
||||
|
||||
Datum testprs_getlexeme(PG_FUNCTION_ARGS) |
||||
{ |
||||
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); |
||||
char **t = (char **) PG_GETARG_POINTER(1); |
||||
int *tlen = (int *) PG_GETARG_POINTER(2); |
||||
int type; |
||||
|
||||
*tlen = pst->pos; |
||||
*t = pst->buffer + pst->pos; |
||||
|
||||
if ((pst->buffer)[pst->pos] == ' ') |
||||
{ |
||||
/* blank type */ |
||||
type = 12; |
||||
/* go to the next non-white-space character */ |
||||
while ((pst->buffer)[pst->pos] == ' ' && |
||||
pst->pos < pst->len) |
||||
(pst->pos)++; |
||||
} else { |
||||
/* word type */ |
||||
type = 3; |
||||
/* go to the next white-space character */ |
||||
while ((pst->buffer)[pst->pos] != ' ' && |
||||
pst->pos < pst->len) |
||||
(pst->pos)++; |
||||
} |
||||
|
||||
*tlen = pst->pos - *tlen; |
||||
|
||||
/* we are finished if (*tlen == 0) */ |
||||
if (*tlen == 0) |
||||
type=0; |
||||
|
||||
PG_RETURN_INT32(type); |
||||
} |
||||
|
||||
Datum testprs_end(PG_FUNCTION_ARGS) |
||||
{ |
||||
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); |
||||
pfree(pst); |
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
Datum testprs_lextype(PG_FUNCTION_ARGS) |
||||
{ |
||||
/*
|
||||
* Remarks: |
||||
* - we have to return the blanks for headline reason |
||||
* - we use the same lexids like Teodor in the default |
||||
* word parser; in this way we can reuse the headline |
||||
* function of the default word parser. |
||||
*/ |
||||
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1)); |
||||
|
||||
/* there are only two types in this parser */ |
||||
descr[0].lexid = 3; |
||||
descr[0].alias = pstrdup("word"); |
||||
descr[0].descr = pstrdup("Word"); |
||||
descr[1].lexid = 12; |
||||
descr[1].alias = pstrdup("blank"); |
||||
descr[1].descr = pstrdup("Space symbols"); |
||||
descr[2].lexid = 0; |
||||
|
||||
PG_RETURN_POINTER(descr); |
||||
} |
@ -0,0 +1,36 @@ |
||||
-- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $ |
||||
|
||||
-- Adjust this setting to control where the objects get created. |
||||
SET search_path = public; |
||||
|
||||
BEGIN; |
||||
|
||||
CREATE FUNCTION testprs_start(internal, int4) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE FUNCTION testprs_getlexeme(internal, internal, internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE FUNCTION testprs_end(internal) |
||||
RETURNS void |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE FUNCTION testprs_lextype(internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT; |
||||
|
||||
CREATE TEXT SEARCH PARSER testparser ( |
||||
START = testprs_start, |
||||
GETTOKEN = testprs_getlexeme, |
||||
END = testprs_end, |
||||
HEADLINE = pg_catalog.prsd_headline, |
||||
LEXTYPES = testprs_lextype |
||||
); |
||||
|
||||
END; |
@ -0,0 +1,11 @@ |
||||
SET search_path = public; |
||||
|
||||
DROP TEXT SEARCH PARSER testparser; |
||||
|
||||
DROP FUNCTION testprs_start(internal, int4); |
||||
|
||||
DROP FUNCTION testprs_getlexeme(internal, internal, internal); |
||||
|
||||
DROP FUNCTION testprs_end(internal); |
||||
|
||||
DROP FUNCTION testprs_lextype(internal); |
Loading…
Reference in new issue