mirror of https://github.com/postgres/postgres
strict_word_similarity is similar to existing word_similarity function but it takes into account word boundaries to compute similarity. Author: Alexander Korotkov Review by: David Steele, Liudmila Mantrova, me Discussion: https://www.postgresql.org/message-id/flat/CY4PR17MB13207ED8310F847CF117EED0D85A0@CY4PR17MB1320.namprd17.prod.outlook.compull/31/merge
parent
f20b328534
commit
be8a7a6866
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,68 @@ |
||||
/* contrib/pg_trgm/pg_trgm--1.3--1.4.sql */ |
||||
|
||||
-- complain if script is sourced in psql, rather than via ALTER EXTENSION |
||||
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.4'" to load this file. \quit |
||||
|
||||
CREATE FUNCTION strict_word_similarity(text,text) |
||||
RETURNS float4 |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; |
||||
|
||||
CREATE FUNCTION strict_word_similarity_op(text,text) |
||||
RETURNS bool |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold |
||||
|
||||
CREATE FUNCTION strict_word_similarity_commutator_op(text,text) |
||||
RETURNS bool |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold |
||||
|
||||
CREATE OPERATOR <<% ( |
||||
LEFTARG = text, |
||||
RIGHTARG = text, |
||||
PROCEDURE = strict_word_similarity_op, |
||||
COMMUTATOR = '%>>', |
||||
RESTRICT = contsel, |
||||
JOIN = contjoinsel |
||||
); |
||||
|
||||
CREATE OPERATOR %>> ( |
||||
LEFTARG = text, |
||||
RIGHTARG = text, |
||||
PROCEDURE = strict_word_similarity_commutator_op, |
||||
COMMUTATOR = '<<%', |
||||
RESTRICT = contsel, |
||||
JOIN = contjoinsel |
||||
); |
||||
|
||||
CREATE FUNCTION strict_word_similarity_dist_op(text,text) |
||||
RETURNS float4 |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; |
||||
|
||||
CREATE FUNCTION strict_word_similarity_dist_commutator_op(text,text) |
||||
RETURNS float4 |
||||
AS 'MODULE_PATHNAME' |
||||
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; |
||||
|
||||
CREATE OPERATOR <<<-> ( |
||||
LEFTARG = text, |
||||
RIGHTARG = text, |
||||
PROCEDURE = strict_word_similarity_dist_op, |
||||
COMMUTATOR = '<->>>' |
||||
); |
||||
|
||||
CREATE OPERATOR <->>> ( |
||||
LEFTARG = text, |
||||
RIGHTARG = text, |
||||
PROCEDURE = strict_word_similarity_dist_commutator_op, |
||||
COMMUTATOR = '<<<->' |
||||
); |
||||
|
||||
ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD |
||||
OPERATOR 9 %>> (text, text), |
||||
OPERATOR 10 <->>> (text, text) FOR ORDER BY pg_catalog.float_ops; |
||||
|
||||
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD |
||||
OPERATOR 9 %>> (text, text); |
@ -1,5 +1,5 @@ |
||||
# pg_trgm extension |
||||
comment = 'text similarity measurement and index searching based on trigrams' |
||||
default_version = '1.3' |
||||
default_version = '1.4' |
||||
module_pathname = '$libdir/pg_trgm' |
||||
relocatable = true |
||||
|
@ -0,0 +1,42 @@ |
||||
DROP INDEX trgm_idx2; |
||||
|
||||
\copy test_trgm3 from 'data/trgm2.data' |
||||
|
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||
select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7; |
||||
|
||||
create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); |
||||
set enable_seqscan=off; |
||||
|
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||
|
||||
explain (costs off) |
||||
select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7; |
||||
select t <->>> 'Alaikallupoddakulam', t from test_trgm2 order by t <->>> 'Alaikallupoddakulam' limit 7; |
||||
|
||||
drop index trgm_idx2; |
||||
create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); |
||||
set enable_seqscan=off; |
||||
|
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||
|
||||
set "pg_trgm.strict_word_similarity_threshold" to 0.4; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
||||
|
||||
set "pg_trgm.strict_word_similarity_threshold" to 0.2; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; |
||||
select t,strict_word_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; |
||||
select t,strict_word_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; |
Loading…
Reference in new issue