diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 2ba687c3256..ab35a9feacd 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3607,16 +3607,19 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, * The condition's equality operator must be a member of the * index opfamily, else it is not asserting the right kind of * equality behavior for this index. We check this first - * since it's probably cheaper than match_index_to_operand(). + * since it's probably the cheapest test. */ if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c])) continue; /* - * XXX at some point we may need to check collations here too. - * For the moment we assume all collations reduce to the same - * notion of equality. + * The index's collation must agree with the clause's input + * collation on equality, else the index's uniqueness does not + * imply uniqueness under the clause's equality semantics. */ + if (!collations_agree_on_equality(ind->indexcollations[c], + exprInputCollation((Node *) rinfo->clause))) + continue; /* OK, see if the condition operand matches the index key */ if (rinfo->outer_is_left) @@ -3654,10 +3657,13 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, continue; /* - * XXX at some point we may need to check collations here too. - * For the moment we assume all collations reduce to the same - * notion of equality. + * The index's collation must agree with the operand's + * collation on equality, else the index's uniqueness does not + * imply uniqueness under the operator's equality semantics. */ + if (!collations_agree_on_equality(ind->indexcollations[c], + exprCollation(expr))) + continue; matched = true; /* column is unique */ break; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 5564b53f257..83d3850728c 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -780,6 +780,44 @@ comparison_ops_are_compatible(Oid opno1, Oid opno2) return result; } +/* + * collations_agree_on_equality + * Return true if the two collations have equivalent notions of equality, + * so that a uniqueness or equality proof established under one side + * carries over to a comparison performed under the other side. + * + * Note: this is equality compatibility only. Do NOT use this to reason + * about ordering. + * + * An InvalidOid on either side denotes the absence of a collation -- that + * side's operation is not collation-sensitive (e.g. a non-collatable column + * type). Absence of a collation cannot conflict with the other side's + * collation, so we treat such pairs as agreeing on equality. This generalizes + * the asymmetric treatment in IndexCollMatchesExprColl(). + * + * Otherwise the collations have equivalent equality if they match, or if both + * are deterministic: by definition a deterministic collation treats two + * strings as equal iff they are byte-wise equal (see CREATE COLLATION), so any + * two deterministic collations share the same equality relation. A mismatch + * involving a nondeterministic collation, however, may mean the two equality + * relations disagree, and the proof is unsound. + */ +bool +collations_agree_on_equality(Oid coll1, Oid coll2) +{ + if (!OidIsValid(coll1) || !OidIsValid(coll2)) + return true; + + if (coll1 == coll2) + return true; + + if (!get_collation_isdeterministic(coll1) || + !get_collation_isdeterministic(coll2)) + return false; + + return true; +} + /* ---------- AMPROC CACHES ---------- */ diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 77871aaefc3..ac1ddb81307 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -86,6 +86,7 @@ extern bool get_op_hash_functions(Oid opno, extern List *get_op_btree_interpretation(Oid opno); extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern bool comparison_ops_are_compatible(Oid opno1, Oid opno2); +extern bool collations_agree_on_equality(Oid coll1, Oid coll2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum, bool missing_ok); diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 02a946c2ef6..c7d7122bd2b 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1389,6 +1389,129 @@ SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b'); {A,B,C,D,E,F,G,H,I} (1 row) +-- +-- A unique index under one collation does not prove uniqueness under +-- another, so the planner must not use such a proof for any optimization. +-- +-- Ensure that we do not use inner-unique join execution +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM test1cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + QUERY PLAN +---------------------------------------------------------------------- + Sort + Output: t1.x, t2.x + Sort Key: t1.x COLLATE case_sensitive, t2.x COLLATE case_sensitive + -> Hash Join + Output: t1.x, t2.x + Hash Cond: ((t2.x)::text = (t1.x)::text) + -> Seq Scan on collate_tests.test3cs t2 + Output: t2.x + -> Hash + Output: t1.x + -> Seq Scan on collate_tests.test1cs t1 + Output: t1.x +(12 rows) + +SELECT * FROM test1cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + x | x +-----+----- + abc | abc + abc | ABC + ABC | abc + ABC | ABC + def | def + ghi | ghi +(6 rows) + +-- Ensure that left-join is not removed +EXPLAIN (COSTS OFF) +SELECT t1.* FROM test3cs t1 + LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive +ORDER BY 1; + QUERY PLAN +------------------------------------------ + Sort + Sort Key: t1.x COLLATE case_sensitive + -> Hash Left Join + Hash Cond: (t1.x = (t2.x)::text) + -> Seq Scan on test3cs t1 + -> Hash + -> Seq Scan on test3cs t2 +(7 rows) + +SELECT t1.* FROM test3cs t1 + LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive +ORDER BY 1; + x +----- + abc + abc + ABC + ABC + def + ghi +(6 rows) + +-- Ensure that self-join is not removed +EXPLAIN (COSTS OFF) +SELECT * FROM test3cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + QUERY PLAN +---------------------------------------------------------------------- + Sort + Sort Key: t1.x COLLATE case_sensitive, t2.x COLLATE case_sensitive + -> Hash Join + Hash Cond: ((t1.x)::text = (t2.x)::text) + -> Seq Scan on test3cs t1 + -> Hash + -> Seq Scan on test3cs t2 +(7 rows) + +SELECT * FROM test3cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + x | x +-----+----- + abc | abc + abc | ABC + ABC | abc + ABC | ABC + def | def + ghi | ghi +(6 rows) + +-- Ensure that semijoin is not reduced to innerjoin +EXPLAIN (COSTS OFF) +SELECT * FROM test3cs t1 + WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive) +ORDER BY 1; + QUERY PLAN +-------------------------------------------------- + Sort + Sort Key: t1.x COLLATE case_sensitive + -> Hash Semi Join + Hash Cond: ((t1.x)::text = (t2.x)::text) + -> Seq Scan on test3cs t1 + -> Hash + -> Seq Scan on test3cs t2 +(7 rows) + +SELECT * FROM test3cs t1 + WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive) +ORDER BY 1; + x +----- + abc + ABC + def + ghi +(4 rows) + CREATE TABLE test1ci (x text COLLATE case_insensitive); CREATE TABLE test2ci (x text COLLATE case_insensitive); CREATE TABLE test3ci (x text COLLATE case_insensitive); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index b093e1d1670..a0f140eb090 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -526,6 +526,51 @@ CREATE UNIQUE INDEX ON test3cs (x); -- ok SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b'); +-- +-- A unique index under one collation does not prove uniqueness under +-- another, so the planner must not use such a proof for any optimization. +-- + +-- Ensure that we do not use inner-unique join execution +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM test1cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + +SELECT * FROM test1cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + +-- Ensure that left-join is not removed +EXPLAIN (COSTS OFF) +SELECT t1.* FROM test3cs t1 + LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive +ORDER BY 1; + +SELECT t1.* FROM test3cs t1 + LEFT JOIN test3cs t2 ON t1.x = t2.x COLLATE case_insensitive +ORDER BY 1; + +-- Ensure that self-join is not removed +EXPLAIN (COSTS OFF) +SELECT * FROM test3cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + +SELECT * FROM test3cs t1, test3cs t2 +WHERE t1.x = t2.x COLLATE case_insensitive +ORDER BY 1, 2; + +-- Ensure that semijoin is not reduced to innerjoin +EXPLAIN (COSTS OFF) +SELECT * FROM test3cs t1 + WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive) +ORDER BY 1; + +SELECT * FROM test3cs t1 + WHERE EXISTS (SELECT 1 FROM test3cs t2 WHERE t1.x = t2.x COLLATE case_insensitive) +ORDER BY 1; + CREATE TABLE test1ci (x text COLLATE case_insensitive); CREATE TABLE test2ci (x text COLLATE case_insensitive); CREATE TABLE test3ci (x text COLLATE case_insensitive);