Add standard collation UNICODE

This adds a new predefined collation named UNICODE, which sorts by the default Unicode collation algorithm specifications, per SQL standard. This only works if ICU support is built. Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://www.postgresql.org/message-id/flat/1293e382-2093-a2bf-a397-c04e8f83d3c2@enterprisedb.com
3 years ago · 0d21d4b9bc
parent 6ad5793a49
commit 0d21d4b9bc
5 changed files with 46 additions and 7 deletions
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@ -659,9 +659,34 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
   </para>
   <para>
-    Additionally, the SQL standard collation name <literal>ucs_basic</literal>
+    Additionally, two SQL standard collation names are available:
-    is available for encoding <literal>UTF8</literal>.  It is equivalent
+
-    to <literal>C</literal> and sorts by Unicode code point.
+    <variablelist>
     <varlistentry>
      <term><literal>unicode</literal></term>
      <listitem>
       <para>
        This collation sorts using the Unicode Collation Algorithm with the
        Default Unicode Collation Element Table.  It is available in all
        encodings.  ICU support is required to use this collation.  (This
        collation has the same behavior as the ICU root locale; see <xref
        linkend="collation-managing-predefined-icu-und-x-icu"/>.)
       </para>
      </listitem>
     </varlistentry>
     <varlistentry>
      <term><literal>ucs_basic</literal></term>
      <listitem>
       <para>
        This collation sorts by Unicode code point.  It is only available for
        encoding <literal>UTF8</literal>.  (This collation has the same
        behavior as the libc locale specification <literal>C</literal> in
        <literal>UTF8</literal> encoding.)
       </para>
      </listitem>
     </varlistentry>
    </variablelist>
   </para>
  </sect3>
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@ -1493,10 +1493,14 @@ static void
 setup_collation(FILE *cmdfd)
 {
 	/*
-	 * Add an SQL-standard name.  We don't want to pin this, so it doesn't go
+	 * Add SQL-standard names.  We don't want to pin these, so they don't go
-	 * in pg_collation.h.  But add it before reading system collations, so
+	 * in pg_collation.dat.  But add them before reading system collations, so
-	 * that it wins if libc defines a locale named ucs_basic.
+	 * that they win if libc defines a locale with the same name.
 	 */
 	PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
 				  "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
 				  BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
 	PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
 				  "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n",
 				  BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@ -57,6 +57,6 @@
 */
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202303081
+#define CATALOG_VERSION_NO	202303101
 #endif
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@ -1151,6 +1151,15 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
 2 | äbc
 (4 rows)
 SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
 a |  b  
 ---+-----
 1 | abc
 4 | ABC
 2 | äbc
 3 | bbc
 (4 rows)
 -- test ICU collation customization
 -- test the attributes handled by icu_set_collation_attributes()
 CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@ -447,6 +447,7 @@ drop type textrange_en_us;
 -- standard collations
 SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
 SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
 -- test ICU collation customization