mirror of https://github.com/postgres/postgres
parent
a88a48011c
commit
92e05bc6a5
@ -0,0 +1,24 @@ |
||||
# $PostgreSQL: pgsql/contrib/unaccent/Makefile,v 1.1 2009/08/18 10:34:39 teodor Exp $
|
||||
|
||||
MODULE_big = unaccent
|
||||
OBJS = unaccent.o
|
||||
|
||||
DATA_built = unaccent.sql
|
||||
DATA = uninstall_unaccent.sql
|
||||
DATA_TSEARCH = unaccent.rules
|
||||
REGRESS = unaccent
|
||||
|
||||
|
||||
ifdef USE_PGXS |
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS) |
||||
else |
||||
subdir = contrib/pg_trgm
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global |
||||
include $(top_srcdir)/contrib/contrib-global.mk |
||||
endif |
||||
|
||||
#redefine REGRESS_OPTS because of needings of UTF8 database
|
||||
REGRESS_OPTS = --dbname=$(CONTRIB_TESTDB) --multibyte=UTF8 --no-locale
|
||||
@ -0,0 +1,58 @@ |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
RESET client_min_messages; |
||||
SET client_encoding TO 'KOI8'; |
||||
SELECT unaccent('foobar'); |
||||
unaccent |
||||
---------- |
||||
foobar |
||||
(1 row) |
||||
|
||||
SELECT unaccent('L肆'); |
||||
unaccent |
||||
---------- |
||||
盘肆 |
||||
(1 row) |
||||
|
||||
SELECT unaccent('出殡'); |
||||
unaccent |
||||
---------- |
||||
弼殡 |
||||
(1 row) |
||||
|
||||
SELECT unaccent('unaccent', 'foobar'); |
||||
unaccent |
||||
---------- |
||||
foobar |
||||
(1 row) |
||||
|
||||
SELECT unaccent('unaccent', 'L肆'); |
||||
unaccent |
||||
---------- |
||||
盘肆 |
||||
(1 row) |
||||
|
||||
SELECT unaccent('unaccent', '出殡'); |
||||
unaccent |
||||
---------- |
||||
弼殡 |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('unaccent', 'foobar'); |
||||
ts_lexize |
||||
----------- |
||||
|
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('unaccent', 'L肆'); |
||||
ts_lexize |
||||
----------- |
||||
{盘肆} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('unaccent', '出殡'); |
||||
ts_lexize |
||||
----------- |
||||
{弼殡} |
||||
(1 row) |
||||
|
||||
@ -0,0 +1,19 @@ |
||||
SET client_min_messages = warning; |
||||
\set ECHO none |
||||
\i unaccent.sql |
||||
\set ECHO all |
||||
RESET client_min_messages; |
||||
|
||||
SET client_encoding TO 'KOI8'; |
||||
|
||||
SELECT unaccent('foobar'); |
||||
SELECT unaccent('L肆'); |
||||
SELECT unaccent('出殡'); |
||||
|
||||
SELECT unaccent('unaccent', 'foobar'); |
||||
SELECT unaccent('unaccent', 'L肆'); |
||||
SELECT unaccent('unaccent', '出殡'); |
||||
|
||||
SELECT ts_lexize('unaccent', 'foobar'); |
||||
SELECT ts_lexize('unaccent', 'L肆'); |
||||
SELECT ts_lexize('unaccent', '出殡'); |
||||
@ -0,0 +1,318 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* unaccent.c |
||||
* Text search unaccent dictionary |
||||
* |
||||
* Copyright (c) 2009, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* $PostgreSQL: pgsql/contrib/unaccent/unaccent.c,v 1.1 2009/08/18 10:34:39 teodor Exp $ |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "fmgr.h" |
||||
#include "catalog/namespace.h" |
||||
#include "commands/defrem.h" |
||||
#include "mb/pg_wchar.h" |
||||
#include "tsearch/ts_cache.h" |
||||
#include "tsearch/ts_locale.h" |
||||
#include "tsearch/ts_public.h" |
||||
#include "utils/builtins.h" |
||||
|
||||
PG_MODULE_MAGIC; |
||||
|
||||
/*
|
||||
* Unaccent dictionary uses uncompressed suffix tree to find a
|
||||
* character to replace. Each node of tree is an array of
|
||||
* SuffixChar struct with length = 256 (n-th element of array |
||||
* corresponds to byte) |
||||
*/ |
||||
typedef struct SuffixChar { |
||||
struct SuffixChar *nextChar; |
||||
char *replaceTo; |
||||
int replacelen; |
||||
} SuffixChar; |
||||
|
||||
/*
|
||||
* placeChar - put str into tree's structure, byte by byte. |
||||
*/ |
||||
static SuffixChar* |
||||
placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen) |
||||
{ |
||||
SuffixChar *curnode; |
||||
|
||||
if ( !node ) |
||||
{ |
||||
node = palloc(sizeof(SuffixChar) * 256); |
||||
memset(node, 0, sizeof(SuffixChar) * 256); |
||||
} |
||||
|
||||
curnode = node + *str; |
||||
|
||||
if ( lenstr == 1 ) |
||||
{ |
||||
if ( curnode->replaceTo ) |
||||
elog(WARNING, "duplicate TO argument, use first one"); |
||||
else |
||||
{ |
||||
curnode->replacelen = replacelen; |
||||
curnode->replaceTo = palloc( replacelen ); |
||||
memcpy(curnode->replaceTo, replaceTo, replacelen); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
curnode->nextChar = placeChar( curnode->nextChar, str+1, lenstr-1, replaceTo, replacelen); |
||||
} |
||||
|
||||
return node; |
||||
} |
||||
|
||||
/*
|
||||
* initSuffixTree - create suffix tree from file. Function converts |
||||
* UTF8-encoded file into current encoding. |
||||
*/ |
||||
static SuffixChar* |
||||
initSuffixTree(char *filename)
|
||||
{ |
||||
SuffixChar *rootSuffixTree = NULL; |
||||
MemoryContext ccxt = CurrentMemoryContext; |
||||
tsearch_readline_state trst; |
||||
bool skip; |
||||
|
||||
filename = get_tsearch_config_filename(filename, "rules"); |
||||
if (!tsearch_readline_begin(&trst, filename)) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR), |
||||
errmsg("could not open unaccent file \"%s\": %m", |
||||
filename))); |
||||
|
||||
do
|
||||
{ |
||||
char src[4096]; |
||||
char trg[4096]; |
||||
int srclen; |
||||
int trglen; |
||||
char *line = NULL; |
||||
|
||||
skip = true; |
||||
|
||||
PG_TRY(); |
||||
{ |
||||
/*
|
||||
* pg_do_encoding_conversion() (called by tsearch_readline()) |
||||
* will emit exception if it finds untranslatable characters in current locale. |
||||
* We just skip such characters. |
||||
*/ |
||||
while ((line = tsearch_readline(&trst)) != NULL) |
||||
{ |
||||
if ( sscanf(line, "%s\t%s\n", src, trg)!=2 ) |
||||
continue; |
||||
|
||||
srclen = strlen(src); |
||||
trglen = strlen(trg); |
||||
|
||||
rootSuffixTree = placeChar(rootSuffixTree,
|
||||
(unsigned char*)src, srclen,
|
||||
trg, trglen); |
||||
skip = false; |
||||
pfree(line); |
||||
} |
||||
} |
||||
PG_CATCH(); |
||||
{ |
||||
ErrorData *errdata; |
||||
MemoryContext ecxt; |
||||
|
||||
ecxt = MemoryContextSwitchTo(ccxt); |
||||
errdata = CopyErrorData(); |
||||
if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER) |
||||
{ |
||||
FlushErrorState(); |
||||
} |
||||
else |
||||
{ |
||||
MemoryContextSwitchTo(ecxt); |
||||
PG_RE_THROW(); |
||||
} |
||||
} |
||||
PG_END_TRY(); |
||||
} |
||||
while(skip); |
||||
|
||||
tsearch_readline_end(&trst); |
||||
|
||||
return rootSuffixTree; |
||||
} |
||||
|
||||
/*
|
||||
* findReplaceTo - find multibyte character in tree |
||||
*/ |
||||
static SuffixChar *
|
||||
findReplaceTo( SuffixChar *node, unsigned char *src, int srclen ) |
||||
{ |
||||
while( node )
|
||||
{ |
||||
node = node + *src; |
||||
if ( srclen == 1 ) |
||||
return node; |
||||
|
||||
src++; |
||||
srclen--; |
||||
node = node->nextChar; |
||||
} |
||||
|
||||
return NULL; |
||||
} |
||||
|
||||
PG_FUNCTION_INFO_V1(unaccent_init); |
||||
Datum unaccent_init(PG_FUNCTION_ARGS); |
||||
Datum |
||||
unaccent_init(PG_FUNCTION_ARGS) |
||||
{ |
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0); |
||||
SuffixChar *rootSuffixTree; |
||||
bool fileloaded = false; |
||||
ListCell *l; |
||||
|
||||
foreach(l, dictoptions) |
||||
{ |
||||
DefElem *defel = (DefElem *) lfirst(l); |
||||
|
||||
if (pg_strcasecmp("Rules", defel->defname) == 0) |
||||
{ |
||||
if (fileloaded) |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
||||
errmsg("multiple Rules parameters"))); |
||||
rootSuffixTree = initSuffixTree(defGetString(defel)); |
||||
fileloaded = true; |
||||
} |
||||
else |
||||
{ |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
||||
errmsg("unrecognized Unaccent parameter: \"%s\"", |
||||
defel->defname))); |
||||
} |
||||
} |
||||
|
||||
if (!fileloaded) |
||||
{ |
||||
ereport(ERROR, |
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
||||
errmsg("missing Rules parameter"))); |
||||
} |
||||
|
||||
PG_RETURN_POINTER(rootSuffixTree); |
||||
} |
||||
|
||||
PG_FUNCTION_INFO_V1(unaccent_lexize); |
||||
Datum unaccent_lexize(PG_FUNCTION_ARGS); |
||||
Datum |
||||
unaccent_lexize(PG_FUNCTION_ARGS) |
||||
{ |
||||
SuffixChar *rootSuffixTree = (SuffixChar*)PG_GETARG_POINTER(0); |
||||
char *srcchar = (char *) PG_GETARG_POINTER(1); |
||||
int32 len = PG_GETARG_INT32(2); |
||||
char *srcstart, *trgchar; |
||||
int charlen; |
||||
TSLexeme *res = NULL; |
||||
SuffixChar *node; |
||||
|
||||
srcstart = srcchar; |
||||
while( srcchar - srcstart < len ) |
||||
{ |
||||
charlen = pg_mblen(srcchar); |
||||
|
||||
node = findReplaceTo( rootSuffixTree, (unsigned char *) srcchar, charlen ); |
||||
if ( node && node->replaceTo ) |
||||
{ |
||||
if ( !res ) |
||||
{ |
||||
/* allocate res only it it's needed */ |
||||
res = palloc0(sizeof(TSLexeme) * 2); |
||||
res->lexeme = trgchar = palloc( len * pg_database_encoding_max_length() + 1 /* \0 */ ); |
||||
res->flags = TSL_FILTER; |
||||
if ( srcchar != srcstart ) |
||||
{ |
||||
memcpy(trgchar, srcstart, srcchar - srcstart); |
||||
trgchar += (srcchar - srcstart); |
||||
} |
||||
} |
||||
memcpy( trgchar, node->replaceTo, node->replacelen ); |
||||
trgchar += node->replacelen;
|
||||
} |
||||
else if ( res ) |
||||
{ |
||||
memcpy( trgchar, srcchar, charlen ); |
||||
trgchar += charlen; |
||||
} |
||||
|
||||
srcchar += charlen; |
||||
} |
||||
|
||||
if ( res ) |
||||
*trgchar = '\0'; |
||||
|
||||
PG_RETURN_POINTER(res); |
||||
} |
||||
|
||||
/*
|
||||
* Function-like wrapper for dictionary |
||||
*/ |
||||
PG_FUNCTION_INFO_V1(unaccent_dict); |
||||
Datum unaccent_dict(PG_FUNCTION_ARGS); |
||||
Datum |
||||
unaccent_dict(PG_FUNCTION_ARGS) |
||||
{ |
||||
text *str; |
||||
int strArg; |
||||
Oid dictOid; |
||||
TSDictionaryCacheEntry *dict; |
||||
TSLexeme *res; |
||||
|
||||
if (PG_NARGS() == 1) |
||||
{ |
||||
dictOid = TSDictionaryGetDictid(stringToQualifiedNameList("unaccent"), false); |
||||
strArg = 0; |
||||
} |
||||
else |
||||
{ |
||||
dictOid = PG_GETARG_OID(0); |
||||
strArg = 1; |
||||
} |
||||
str = PG_GETARG_TEXT_P(strArg); |
||||
|
||||
dict = lookup_ts_dictionary_cache(dictOid); |
||||
|
||||
res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize), |
||||
PointerGetDatum(dict->dictData), |
||||
PointerGetDatum(VARDATA(str)), |
||||
Int32GetDatum(VARSIZE(str) - VARHDRSZ), |
||||
PointerGetDatum(NULL))); |
||||
|
||||
PG_FREE_IF_COPY(str, strArg); |
||||
|
||||
if ( res == NULL ) |
||||
{ |
||||
PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg)); |
||||
} |
||||
else if ( res->lexeme == NULL ) |
||||
{ |
||||
pfree(res); |
||||
PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg)); |
||||
} |
||||
else |
||||
{ |
||||
text *txt = cstring_to_text(res->lexeme); |
||||
|
||||
pfree(res->lexeme); |
||||
pfree(res); |
||||
|
||||
PG_RETURN_TEXT_P(txt); |
||||
} |
||||
} |
||||
@ -0,0 +1,187 @@ |
||||
À A |
||||
Á A |
||||
 A |
||||
à A |
||||
Ä A |
||||
Å A |
||||
Æ A |
||||
à a |
||||
á a |
||||
â a |
||||
ã a |
||||
ä a |
||||
å a |
||||
æ a |
||||
Ā A |
||||
ā a |
||||
Ă A |
||||
ă a |
||||
Ą A |
||||
ą a |
||||
Ç C |
||||
ç c |
||||
Ć C |
||||
ć c |
||||
Ĉ C |
||||
ĉ c |
||||
Ċ C |
||||
ċ c |
||||
Č C |
||||
č c |
||||
Ď D |
||||
ď d |
||||
Đ D |
||||
đ d |
||||
È E |
||||
É E |
||||
Ê E |
||||
Ë E |
||||
è e |
||||
é e |
||||
ê e |
||||
ë e |
||||
Ē E |
||||
ē e |
||||
Ĕ E |
||||
ĕ e |
||||
Ė E |
||||
ė e |
||||
Ę E |
||||
ę e |
||||
Ě E |
||||
ě e |
||||
Ĝ G |
||||
ĝ g |
||||
Ğ G |
||||
ğ g |
||||
Ġ G |
||||
ġ g |
||||
Ģ G |
||||
ģ g |
||||
Ĥ H |
||||
ĥ h |
||||
Ħ H |
||||
ħ h |
||||
Ĩ I |
||||
Ì I |
||||
Í I |
||||
Î I |
||||
Ï I |
||||
ì i |
||||
í i |
||||
î i |
||||
ï i |
||||
ĩ i |
||||
Ī I |
||||
ī i |
||||
Ĭ I |
||||
ĭ i |
||||
Į I |
||||
į i |
||||
İ I |
||||
ı i |
||||
IJ I |
||||
ij i |
||||
Ĵ J |
||||
ĵ j |
||||
Ķ K |
||||
ķ k |
||||
ĸ k |
||||
Ĺ L |
||||
ĺ l |
||||
Ļ L |
||||
ļ l |
||||
Ľ L |
||||
ľ l |
||||
Ŀ L |
||||
ŀ l |
||||
Ł L |
||||
ł l |
||||
Ñ N |
||||
ñ n |
||||
Ń N |
||||
ń n |
||||
Ņ N |
||||
ņ n |
||||
Ň N |
||||
ň n |
||||
ʼn n |
||||
Ŋ N |
||||
ŋ n |
||||
Ò O |
||||
Ó O |
||||
Ô O |
||||
Õ O |
||||
Ö O |
||||
ò o |
||||
ó o |
||||
ô o |
||||
õ o |
||||
ö o |
||||
Ō O |
||||
ō o |
||||
Ŏ O |
||||
ŏ o |
||||
Ő O |
||||
ő o |
||||
Œ E |
||||
œ e |
||||
Ø O |
||||
ø o |
||||
Ŕ R |
||||
ŕ r |
||||
Ŗ R |
||||
ŗ r |
||||
Ř R |
||||
ř r |
||||
ß S |
||||
Ś S |
||||
ś s |
||||
Ŝ S |
||||
ŝ s |
||||
Ş S |
||||
ş s |
||||
Š S |
||||
š s |
||||
Ţ T |
||||
ţ t |
||||
Ť T |
||||
ť t |
||||
Ŧ T |
||||
ŧ t |
||||
Ù U |
||||
Ú U |
||||
Û U |
||||
Ü U |
||||
ù u |
||||
ú u |
||||
û u |
||||
ü u |
||||
Ũ U |
||||
ũ u |
||||
Ū U |
||||
ū u |
||||
Ŭ U |
||||
ŭ u |
||||
Ů U |
||||
ů u |
||||
Ű U |
||||
ű u |
||||
Ų U |
||||
ų u |
||||
Ŵ W |
||||
ŵ w |
||||
Ý Y |
||||
ý y |
||||
ÿ y |
||||
Ŷ Y |
||||
ŷ y |
||||
Ÿ Y |
||||
Ź Z |
||||
ź z |
||||
Ż Z |
||||
ż z |
||||
Ž Z |
||||
ž z |
||||
ё е |
||||
Ё Е |
||||
@ -0,0 +1,33 @@ |
||||
/* $PostgreSQL: pgsql/contrib/unaccent/unaccent.sql.in,v 1.1 2009/08/18 10:34:39 teodor Exp $ */ |
||||
|
||||
CREATE OR REPLACE FUNCTION unaccent(regdictionary, text) |
||||
RETURNS text |
||||
AS 'MODULE_PATHNAME', 'unaccent_dict' |
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; |
||||
|
||||
CREATE OR REPLACE FUNCTION unaccent(text) |
||||
RETURNS text |
||||
AS 'MODULE_PATHNAME', 'unaccent_dict' |
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; |
||||
|
||||
CREATE OR REPLACE FUNCTION unaccent_init(internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME', 'unaccent_init' |
||||
LANGUAGE C; |
||||
|
||||
CREATE OR REPLACE FUNCTION unaccent_lexize(internal,internal,internal,internal) |
||||
RETURNS internal |
||||
AS 'MODULE_PATHNAME', 'unaccent_lexize' |
||||
LANGUAGE C; |
||||
|
||||
CREATE TEXT SEARCH TEMPLATE unaccent ( |
||||
INIT = unaccent_init, |
||||
LEXIZE = unaccent_lexize |
||||
); |
||||
|
||||
|
||||
CREATE TEXT SEARCH DICTIONARY unaccent ( |
||||
TEMPLATE = unaccent, |
||||
RULES = 'unaccent' |
||||
); |
||||
|
||||
@ -0,0 +1,9 @@ |
||||
/* $PostgreSQL: pgsql/contrib/unaccent/uninstall_unaccent.sql,v 1.1 2009/08/18 10:34:39 teodor Exp $ */ |
||||
|
||||
DROP FUNCTION IF EXISTS unaccent(regdictionary, text) CASCADE; |
||||
DROP FUNCTION IF EXISTS unaccent(text) CASCADE; |
||||
DROP TEXT SEARCH DICTIONARY IF EXISTS unaccent CASCADE; |
||||
DROP TEXT SEARCH TEMPLATE IF EXISTS unaccent CASCADE; |
||||
DROP FUNCTION IF EXISTS unaccent_init(internal) CASCADE; |
||||
DROP FUNCTION IF EXISTS unaccent_lexize(internal,internal,internal,internal) CASCADE; |
||||
|
||||
@ -0,0 +1,150 @@ |
||||
<sect1 id="unaccent"> |
||||
<title>unaccent</title> |
||||
|
||||
<indexterm zone="unaccent"> |
||||
<primary>unaccent</primary> |
||||
</indexterm> |
||||
|
||||
<para> |
||||
<filename>unaccent</> removes accents (diacritic signs) from a lexeme. |
||||
It's a filtering dictionary, that means its output is |
||||
always passed to the next dictionary (if any), contrary to the standard |
||||
behaviour. Currently, it supports most important accents from european |
||||
languages. |
||||
</para> |
||||
|
||||
<para> |
||||
Limitation: Current implementation of <filename>unaccent</> |
||||
dictionary cannot be used as a normalizing dictionary for |
||||
<filename>thesaurus</filename> dictionary. |
||||
</para> |
||||
|
||||
<sect2> |
||||
<title>Configuration</title> |
||||
|
||||
<para> |
||||
A <literal>unaccent</> dictionary accepts the following options: |
||||
</para> |
||||
<itemizedlist> |
||||
<listitem> |
||||
<para> |
||||
<literal>RULES</> is the base name of the file containing the list of |
||||
translation rules. This file must be stored in |
||||
<filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means |
||||
the <productname>PostgreSQL</> installation's shared-data directory). |
||||
Its name must end in <literal>.rules</> (which is not to be included in |
||||
the <literal>RULES</> parameter). |
||||
</para> |
||||
</listitem> |
||||
</itemizedlist> |
||||
<para> |
||||
The rules file has the following format: |
||||
</para> |
||||
<itemizedlist> |
||||
<listitem> |
||||
<para> |
||||
Each line represents pair: character_with_accent character_without_accent |
||||
<programlisting> |
||||
À A |
||||
Á A |
||||
 A |
||||
à A |
||||
Ä A |
||||
Å A |
||||
Æ A |
||||
</programlisting> |
||||
</para> |
||||
</listitem> |
||||
</itemizedlist> |
||||
|
||||
<para> |
||||
Look at <filename>unaccent.rules</>, which is installed in |
||||
<filename>$SHAREDIR/tsearch_data/</>, for an example. |
||||
</para> |
||||
</sect2> |
||||
|
||||
<sect2> |
||||
<title>Usage</title> |
||||
|
||||
<para> |
||||
Running the installation script creates a text search template |
||||
<literal>unaccent</> and a dictionary <literal>unaccent</> |
||||
based on it, with default parameters. You can alter the |
||||
parameters, for example |
||||
|
||||
<programlisting> |
||||
=# ALTER TEXT SEARCH DICTIONARY unaccent (RULES='my_rules'); |
||||
</programlisting> |
||||
|
||||
or create new dictionaries based on the template. |
||||
</para> |
||||
|
||||
<para> |
||||
To test the dictionary, you can try |
||||
|
||||
<programlisting> |
||||
=# select ts_lexize('unaccent','Hôtel'); |
||||
ts_lexize |
||||
----------- |
||||
{Hotel} |
||||
(1 row) |
||||
</programlisting> |
||||
</para> |
||||
|
||||
<para> |
||||
Filtering dictionary are useful for correct work of |
||||
<function>ts_headline</function> function. |
||||
<programlisting> |
||||
=# CREATE TEXT SEARCH CONFIGURATION fr ( COPY = french ); |
||||
=# ALTER TEXT SEARCH CONFIGURATION fr |
||||
ALTER MAPPING FOR hword, hword_part, word |
||||
WITH unaccent, french_stem; |
||||
=# select to_tsvector('fr','Hôtels de la Mer'); |
||||
to_tsvector |
||||
------------------- |
||||
'hotel':1 'mer':4 |
||||
(1 row) |
||||
|
||||
=# select to_tsvector('fr','Hôtel de la Mer') @@ to_tsquery('fr','Hotels'); |
||||
?column? |
||||
---------- |
||||
t |
||||
(1 row) |
||||
=# select ts_headline('fr','Hôtel de la Mer',to_tsquery('fr','Hotels')); |
||||
ts_headline |
||||
------------------------ |
||||
<b>Hôtel</b>de la Mer |
||||
(1 row) |
||||
|
||||
</programlisting> |
||||
</para> |
||||
</sect2> |
||||
|
||||
<sect2> |
||||
<title>Function</title> |
||||
|
||||
<para> |
||||
<function>unaccent</> function removes accents (diacritic signs) from |
||||
argument string. Basically, it's a wrapper around |
||||
<filename>unaccent</> dictionary. |
||||
</para> |
||||
|
||||
<indexterm> |
||||
<primary>unaccent</primary> |
||||
</indexterm> |
||||
|
||||
<synopsis> |
||||
unaccent(<optional><replaceable class="PARAMETER">dictionary</replaceable>, |
||||
</optional> <replaceable class="PARAMETER">string</replaceable>) |
||||
returns <type>text</type> |
||||
</synopsis> |
||||
|
||||
<para> |
||||
<programlisting> |
||||
SELECT unaccent('unaccent','Hôtel'); |
||||
SELECT unaccent('Hôtel'); |
||||
</programlisting> |
||||
</para> |
||||
</sect2> |
||||
|
||||
</sect1> |
||||
Loading…
Reference in new issue