mirror of https://github.com/postgres/postgres
strict_word_similarity is similar to existing word_similarity function but it takes into account word boundaries to compute similarity. Author: Alexander Korotkov Review by: David Steele, Liudmila Mantrova, me Discussion: https://www.postgresql.org/message-id/flat/CY4PR17MB13207ED8310F847CF117EED0D85A0@CY4PR17MB1320.namprd17.prod.outlook.compull/31/merge
parent
f20b328534
commit
be8a7a6866
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,68 @@ |
|||||||
|
/* contrib/pg_trgm/pg_trgm--1.3--1.4.sql */ |
||||||
|
|
||||||
|
-- complain if script is sourced in psql, rather than via ALTER EXTENSION |
||||||
|
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.4'" to load this file. \quit |
||||||
|
|
||||||
|
CREATE FUNCTION strict_word_similarity(text,text) |
||||||
|
RETURNS float4 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; |
||||||
|
|
||||||
|
CREATE FUNCTION strict_word_similarity_op(text,text) |
||||||
|
RETURNS bool |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold |
||||||
|
|
||||||
|
CREATE FUNCTION strict_word_similarity_commutator_op(text,text) |
||||||
|
RETURNS bool |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold |
||||||
|
|
||||||
|
CREATE OPERATOR <<% ( |
||||||
|
LEFTARG = text, |
||||||
|
RIGHTARG = text, |
||||||
|
PROCEDURE = strict_word_similarity_op, |
||||||
|
COMMUTATOR = '%>>', |
||||||
|
RESTRICT = contsel, |
||||||
|
JOIN = contjoinsel |
||||||
|
); |
||||||
|
|
||||||
|
CREATE OPERATOR %>> ( |
||||||
|
LEFTARG = text, |
||||||
|
RIGHTARG = text, |
||||||
|
PROCEDURE = strict_word_similarity_commutator_op, |
||||||
|
COMMUTATOR = '<<%', |
||||||
|
RESTRICT = contsel, |
||||||
|
JOIN = contjoinsel |
||||||
|
); |
||||||
|
|
||||||
|
CREATE FUNCTION strict_word_similarity_dist_op(text,text) |
||||||
|
RETURNS float4 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; |
||||||
|
|
||||||
|
CREATE FUNCTION strict_word_similarity_dist_commutator_op(text,text) |
||||||
|
RETURNS float4 |
||||||
|
AS 'MODULE_PATHNAME' |
||||||
|
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; |
||||||
|
|
||||||
|
CREATE OPERATOR <<<-> ( |
||||||
|
LEFTARG = text, |
||||||
|
RIGHTARG = text, |
||||||
|
PROCEDURE = strict_word_similarity_dist_op, |
||||||
|
COMMUTATOR = '<->>>' |
||||||
|
); |
||||||
|
|
||||||
|
CREATE OPERATOR <->>> ( |
||||||
|
LEFTARG = text, |
||||||
|
RIGHTARG = text, |
||||||
|
PROCEDURE = strict_word_similarity_dist_commutator_op, |
||||||
|
COMMUTATOR = '<<<->' |
||||||
|
); |
||||||
|
|
||||||
|
ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD |
||||||
|
OPERATOR 9 %>> (text, text), |
||||||
|
OPERATOR 10 <->>> (text, text) FOR ORDER BY pg_catalog.float_ops; |
||||||
|
|
||||||
|
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD |
||||||
|
OPERATOR 9 %>> (text, text); |
@ -1,5 +1,5 @@ |
|||||||
# pg_trgm extension |
# pg_trgm extension |
||||||
comment = 'text similarity measurement and index searching based on trigrams' |
comment = 'text similarity measurement and index searching based on trigrams' |
||||||
default_version = '1.3' |
default_version = '1.4' |
||||||
module_pathname = '$libdir/pg_trgm' |
module_pathname = '$libdir/pg_trgm' |
||||||
relocatable = true |
relocatable = true |
||||||
|
@ -0,0 +1,42 @@ |
|||||||
|
DROP INDEX trgm_idx2; |
||||||
|
|
||||||
|
\copy test_trgm3 from 'data/trgm2.data' |
||||||
|
|
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||||
|
select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7; |
||||||
|
|
||||||
|
create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); |
||||||
|
set enable_seqscan=off; |
||||||
|
|
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||||
|
|
||||||
|
explain (costs off) |
||||||
|
select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7; |
||||||
|
select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7; |
||||||
|
|
||||||
|
drop index trgm_idx2; |
||||||
|
create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); |
||||||
|
set enable_seqscan=off; |
||||||
|
|
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||||
|
|
||||||
|
set "pg_trgm.strict_word_similarity_threshold" to 0.4; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||||
|
|
||||||
|
set "pg_trgm.strict_word_similarity_threshold" to 0.2; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||||
|
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
Loading…
Reference in new issue