Add standard collation UNICODE

This adds a new predefined collation named UNICODE, which sorts by the
default Unicode collation algorithm specifications, per SQL standard.

This only works if ICU support is built.

Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://www.postgresql.org/message-id/flat/1293e382-2093-a2bf-a397-c04e8f83d3c2@enterprisedb.com
pull/134/head
Peter Eisentraut 3 years ago
parent 6ad5793a49
commit 0d21d4b9bc
  1. 31
      doc/src/sgml/charset.sgml
  2. 10
      src/bin/initdb/initdb.c
  3. 2
      src/include/catalog/catversion.h
  4. 9
      src/test/regress/expected/collate.icu.utf8.out
  5. 1
      src/test/regress/sql/collate.icu.utf8.sql

@ -659,9 +659,34 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
</para> </para>
<para> <para>
Additionally, the SQL standard collation name <literal>ucs_basic</literal> Additionally, two SQL standard collation names are available:
is available for encoding <literal>UTF8</literal>. It is equivalent
to <literal>C</literal> and sorts by Unicode code point. <variablelist>
<varlistentry>
<term><literal>unicode</literal></term>
<listitem>
<para>
This collation sorts using the Unicode Collation Algorithm with the
Default Unicode Collation Element Table. It is available in all
encodings. ICU support is required to use this collation. (This
collation has the same behavior as the ICU root locale; see <xref
linkend="collation-managing-predefined-icu-und-x-icu"/>.)
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>ucs_basic</literal></term>
<listitem>
<para>
This collation sorts by Unicode code point. It is only available for
encoding <literal>UTF8</literal>. (This collation has the same
behavior as the libc locale specification <literal>C</literal> in
<literal>UTF8</literal> encoding.)
</para>
</listitem>
</varlistentry>
</variablelist>
</para> </para>
</sect3> </sect3>

@ -1493,10 +1493,14 @@ static void
setup_collation(FILE *cmdfd) setup_collation(FILE *cmdfd)
{ {
/* /*
* Add an SQL-standard name. We don't want to pin this, so it doesn't go * Add SQL-standard names. We don't want to pin these, so they don't go
* in pg_collation.h. But add it before reading system collations, so * in pg_collation.dat. But add them before reading system collations, so
* that it wins if libc defines a locale named ucs_basic. * that they win if libc defines a locale with the same name.
*/ */
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n", "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n",
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);

@ -57,6 +57,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 202303081 #define CATALOG_VERSION_NO 202303101
#endif #endif

@ -1151,6 +1151,15 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
2 | äbc 2 | äbc
(4 rows) (4 rows)
SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
a | b
---+-----
1 | abc
4 | ABC
2 | äbc
3 | bbc
(4 rows)
-- test ICU collation customization -- test ICU collation customization
-- test the attributes handled by icu_set_collation_attributes() -- test the attributes handled by icu_set_collation_attributes()
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes'); CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');

@ -447,6 +447,7 @@ drop type textrange_en_us;
-- standard collations -- standard collations
SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC; SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
-- test ICU collation customization -- test ICU collation customization

Loading…
Cancel
Save