mirror of https://github.com/postgres/postgres
Rename synonym.syn.sample and thesaurs.ths.sample to synonym_sample.syn and thesaurs_sample.ths accordingly to be able to use they in regression test. Ispell dictionary uses synthetic simple dictionary files.REL8_3_STABLE
parent
c4b2b2960a
commit
64def09592
@ -0,0 +1,24 @@ |
||||
COMPOUNDFLAG Z |
||||
ONLYINCOMPOUND L |
||||
|
||||
PFX B Y 1 |
||||
PFX B 0 re . |
||||
|
||||
PFX U N 1 |
||||
PFX U 0 un . |
||||
|
||||
SFX J Y 1 |
||||
SFX J 0 INGS [^E] |
||||
|
||||
SFX G Y 1 |
||||
SFX G 0 ING [^E] |
||||
|
||||
SFX S Y 1 |
||||
SFX S 0 S [^SXZHY] |
||||
|
||||
SFX A Y 1 |
||||
SFX A Y IES [^AEIOU]Y |
||||
|
||||
SFX \ N 1 |
||||
SFX \ 0 Y/L [^Y] |
||||
|
@ -0,0 +1,26 @@ |
||||
compoundwords controlled Z |
||||
|
||||
prefixes |
||||
|
||||
flag *B: |
||||
. > RE # As in enter > reenter |
||||
|
||||
flag U: |
||||
. > UN # As in natural > unnatural |
||||
|
||||
suffixes |
||||
|
||||
flag *J: |
||||
[^E] > INGS # As in cross > crossings |
||||
|
||||
flag *G: |
||||
[^E] > ING # As in cross > crossing |
||||
|
||||
flag *S: |
||||
[^SXZHY] > S # As in bat > bats |
||||
|
||||
flag *A: |
||||
[^AEIOU]Y > -Y,IES # As in imply > implies |
||||
|
||||
flag ~\\: |
||||
[^Y] > Y #~ advarsel > advarsely- |
@ -0,0 +1,8 @@ |
||||
book/GJUS |
||||
booking/SB |
||||
footballklubber |
||||
foot/ZS |
||||
football/Z |
||||
ball/SZ\ |
||||
klubber/Z |
||||
sky/A |
@ -1,3 +0,0 @@ |
||||
skies sky |
||||
booking book |
||||
bookings book |
@ -0,0 +1,4 @@ |
||||
postgres pgsql |
||||
postgresql pgsql |
||||
postgre pgsql |
||||
gogle googl |
@ -0,0 +1,320 @@ |
||||
--Test text search dictionaries and configurations |
||||
-- Test ISpell dictionary with ispell affix file |
||||
CREATE TEXT SEARCH DICTIONARY ispell ( |
||||
Template=ispell, |
||||
DictFile=ispell_sample, |
||||
AffFile=ispell_sample |
||||
); |
||||
SELECT ts_lexize('ispell', 'skies'); |
||||
ts_lexize |
||||
----------- |
||||
{sky} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'bookings'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'booking'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'foot'); |
||||
ts_lexize |
||||
----------- |
||||
{foot} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'foots'); |
||||
ts_lexize |
||||
----------- |
||||
{foot} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'rebookings'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'rebooking'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'rebook'); |
||||
ts_lexize |
||||
----------- |
||||
|
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'unbookings'); |
||||
ts_lexize |
||||
----------- |
||||
{book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'unbooking'); |
||||
ts_lexize |
||||
----------- |
||||
{book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'unbook'); |
||||
ts_lexize |
||||
----------- |
||||
{book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'footklubber'); |
||||
ts_lexize |
||||
---------------- |
||||
{foot,klubber} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'footballklubber'); |
||||
ts_lexize |
||||
------------------------------------------------------ |
||||
{footballklubber,foot,ball,klubber,football,klubber} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'ballyklubber'); |
||||
ts_lexize |
||||
---------------- |
||||
{ball,klubber} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('ispell', 'footballyklubber'); |
||||
ts_lexize |
||||
--------------------- |
||||
{foot,ball,klubber} |
||||
(1 row) |
||||
|
||||
-- Test ISpell dictionary with hunspell affix file |
||||
CREATE TEXT SEARCH DICTIONARY hunspell ( |
||||
Template=ispell, |
||||
DictFile=ispell_sample, |
||||
AffFile=hunspell_sample |
||||
); |
||||
SELECT ts_lexize('hunspell', 'skies'); |
||||
ts_lexize |
||||
----------- |
||||
{sky} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'bookings'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'booking'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'foot'); |
||||
ts_lexize |
||||
----------- |
||||
{foot} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'foots'); |
||||
ts_lexize |
||||
----------- |
||||
{foot} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'rebookings'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'rebooking'); |
||||
ts_lexize |
||||
---------------- |
||||
{booking,book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'rebook'); |
||||
ts_lexize |
||||
----------- |
||||
|
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'unbookings'); |
||||
ts_lexize |
||||
----------- |
||||
{book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'unbooking'); |
||||
ts_lexize |
||||
----------- |
||||
{book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'unbook'); |
||||
ts_lexize |
||||
----------- |
||||
{book} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'footklubber'); |
||||
ts_lexize |
||||
---------------- |
||||
{foot,klubber} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'footballklubber'); |
||||
ts_lexize |
||||
------------------------------------------------------ |
||||
{footballklubber,foot,ball,klubber,football,klubber} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'ballyklubber'); |
||||
ts_lexize |
||||
---------------- |
||||
{ball,klubber} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('hunspell', 'footballyklubber'); |
||||
ts_lexize |
||||
--------------------- |
||||
{foot,ball,klubber} |
||||
(1 row) |
||||
|
||||
-- Synonim dictionary |
||||
CREATE TEXT SEARCH DICTIONARY synonym ( |
||||
Template=synonym, |
||||
Synonyms=synonym_sample |
||||
); |
||||
SELECT ts_lexize('synonym', 'PoStGrEs'); |
||||
ts_lexize |
||||
----------- |
||||
{pgsql} |
||||
(1 row) |
||||
|
||||
SELECT ts_lexize('synonym', 'Gogle'); |
||||
ts_lexize |
||||
----------- |
||||
{googl} |
||||
(1 row) |
||||
|
||||
-- Create and simple test thesaurus dictionary |
||||
-- More test in configuration checks because of ts_lexize |
||||
-- can not give more tat one word as it may wish thesaurus. |
||||
CREATE TEXT SEARCH DICTIONARY thesaurus ( |
||||
Template=thesaurus, |
||||
DictFile=thesaurus_sample, |
||||
Dictionary=english_stem |
||||
); |
||||
NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8) |
||||
SELECT ts_lexize('thesaurus', 'one'); |
||||
NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8) |
||||
ts_lexize |
||||
----------- |
||||
{1} |
||||
(1 row) |
||||
|
||||
-- Test ispell dictionary in configuration |
||||
CREATE TEXT SEARCH CONFIGURATION ispell_tst ( |
||||
COPY=english |
||||
); |
||||
ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR |
||||
hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word |
||||
WITH ispell, english_stem; |
||||
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); |
||||
to_tsvector |
||||
---------------------------------------------------------------------------------------------------- |
||||
'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 |
||||
(1 row) |
||||
|
||||
SELECT to_tsquery('ispell_tst', 'footballklubber'); |
||||
to_tsquery |
||||
------------------------------------------------------------------------------ |
||||
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' |
||||
(1 row) |
||||
|
||||
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); |
||||
to_tsquery |
||||
------------------------------------------------------------------------ |
||||
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' |
||||
(1 row) |
||||
|
||||
-- Test ispell dictionary with hunspell affix in configuration |
||||
CREATE TEXT SEARCH CONFIGURATION hunspell_tst ( |
||||
COPY=ispell_tst |
||||
); |
||||
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING |
||||
REPLACE ispell WITH hunspell; |
||||
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); |
||||
to_tsvector |
||||
---------------------------------------------------------------------------------------------------- |
||||
'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 |
||||
(1 row) |
||||
|
||||
SELECT to_tsquery('hunspell_tst', 'footballklubber'); |
||||
to_tsquery |
||||
------------------------------------------------------------------------------ |
||||
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' |
||||
(1 row) |
||||
|
||||
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); |
||||
to_tsquery |
||||
------------------------------------------------------------------------ |
||||
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' |
||||
(1 row) |
||||
|
||||
-- Test synonym dictionary in configuration |
||||
CREATE TEXT SEARCH CONFIGURATION synonym_tst ( |
||||
COPY=english |
||||
); |
||||
ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR |
||||
lword, lpart_hword, lhword |
||||
WITH synonym, english_stem; |
||||
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre'); |
||||
to_tsvector |
||||
--------------------------------------------------- |
||||
'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10 |
||||
(1 row) |
||||
|
||||
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); |
||||
to_tsvector |
||||
---------------------------------------------------------- |
||||
'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8 |
||||
(1 row) |
||||
|
||||
-- test thesaurus in configuration |
||||
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector |
||||
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst ( |
||||
COPY=synonym_tst |
||||
); |
||||
ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR |
||||
lword, lpart_hword, lhword |
||||
WITH synonym, thesaurus, english_stem; |
||||
SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); |
||||
to_tsvector |
||||
---------------------------------- |
||||
'1':1,5 '12':3 '123':4 'pgsql':2 |
||||
(1 row) |
||||
|
||||
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); |
||||
to_tsvector |
||||
------------------------------------------------------------- |
||||
'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10 |
||||
(1 row) |
||||
|
||||
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); |
||||
to_tsvector |
||||
------------------------------------------------------- |
||||
'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8 |
||||
(1 row) |
||||
|
@ -0,0 +1,121 @@ |
||||
--Test text search dictionaries and configurations |
||||
|
||||
-- Test ISpell dictionary with ispell affix file |
||||
CREATE TEXT SEARCH DICTIONARY ispell ( |
||||
Template=ispell, |
||||
DictFile=ispell_sample, |
||||
AffFile=ispell_sample |
||||
); |
||||
|
||||
SELECT ts_lexize('ispell', 'skies'); |
||||
SELECT ts_lexize('ispell', 'bookings'); |
||||
SELECT ts_lexize('ispell', 'booking'); |
||||
SELECT ts_lexize('ispell', 'foot'); |
||||
SELECT ts_lexize('ispell', 'foots'); |
||||
SELECT ts_lexize('ispell', 'rebookings'); |
||||
SELECT ts_lexize('ispell', 'rebooking'); |
||||
SELECT ts_lexize('ispell', 'rebook'); |
||||
SELECT ts_lexize('ispell', 'unbookings'); |
||||
SELECT ts_lexize('ispell', 'unbooking'); |
||||
SELECT ts_lexize('ispell', 'unbook'); |
||||
|
||||
SELECT ts_lexize('ispell', 'footklubber'); |
||||
SELECT ts_lexize('ispell', 'footballklubber'); |
||||
SELECT ts_lexize('ispell', 'ballyklubber'); |
||||
SELECT ts_lexize('ispell', 'footballyklubber'); |
||||
|
||||
-- Test ISpell dictionary with hunspell affix file |
||||
CREATE TEXT SEARCH DICTIONARY hunspell ( |
||||
Template=ispell, |
||||
DictFile=ispell_sample, |
||||
AffFile=hunspell_sample |
||||
); |
||||
|
||||
SELECT ts_lexize('hunspell', 'skies'); |
||||
SELECT ts_lexize('hunspell', 'bookings'); |
||||
SELECT ts_lexize('hunspell', 'booking'); |
||||
SELECT ts_lexize('hunspell', 'foot'); |
||||
SELECT ts_lexize('hunspell', 'foots'); |
||||
SELECT ts_lexize('hunspell', 'rebookings'); |
||||
SELECT ts_lexize('hunspell', 'rebooking'); |
||||
SELECT ts_lexize('hunspell', 'rebook'); |
||||
SELECT ts_lexize('hunspell', 'unbookings'); |
||||
SELECT ts_lexize('hunspell', 'unbooking'); |
||||
SELECT ts_lexize('hunspell', 'unbook'); |
||||
|
||||
SELECT ts_lexize('hunspell', 'footklubber'); |
||||
SELECT ts_lexize('hunspell', 'footballklubber'); |
||||
SELECT ts_lexize('hunspell', 'ballyklubber'); |
||||
SELECT ts_lexize('hunspell', 'footballyklubber'); |
||||
|
||||
-- Synonim dictionary |
||||
CREATE TEXT SEARCH DICTIONARY synonym ( |
||||
Template=synonym, |
||||
Synonyms=synonym_sample |
||||
); |
||||
|
||||
SELECT ts_lexize('synonym', 'PoStGrEs'); |
||||
SELECT ts_lexize('synonym', 'Gogle'); |
||||
|
||||
-- Create and simple test thesaurus dictionary |
||||
-- More test in configuration checks because of ts_lexize |
||||
-- can not give more tat one word as it may wish thesaurus. |
||||
CREATE TEXT SEARCH DICTIONARY thesaurus ( |
||||
Template=thesaurus, |
||||
DictFile=thesaurus_sample, |
||||
Dictionary=english_stem |
||||
); |
||||
|
||||
SELECT ts_lexize('thesaurus', 'one'); |
||||
|
||||
-- Test ispell dictionary in configuration |
||||
CREATE TEXT SEARCH CONFIGURATION ispell_tst ( |
||||
COPY=english |
||||
); |
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR |
||||
hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word |
||||
WITH ispell, english_stem; |
||||
|
||||
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); |
||||
SELECT to_tsquery('ispell_tst', 'footballklubber'); |
||||
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); |
||||
|
||||
-- Test ispell dictionary with hunspell affix in configuration |
||||
CREATE TEXT SEARCH CONFIGURATION hunspell_tst ( |
||||
COPY=ispell_tst |
||||
); |
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING |
||||
REPLACE ispell WITH hunspell; |
||||
|
||||
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); |
||||
SELECT to_tsquery('hunspell_tst', 'footballklubber'); |
||||
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); |
||||
|
||||
-- Test synonym dictionary in configuration |
||||
CREATE TEXT SEARCH CONFIGURATION synonym_tst ( |
||||
COPY=english |
||||
); |
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR |
||||
lword, lpart_hword, lhword |
||||
WITH synonym, english_stem; |
||||
|
||||
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre'); |
||||
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); |
||||
|
||||
-- test thesaurus in configuration |
||||
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector |
||||
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst ( |
||||
COPY=synonym_tst |
||||
); |
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR |
||||
lword, lpart_hword, lhword |
||||
WITH synonym, thesaurus, english_stem; |
||||
|
||||
SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); |
||||
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); |
||||
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); |
||||
|
Loading…
Reference in new issue