You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
postgres/src/backend/commands/collationcmds.c

636 lines
17 KiB

/*-------------------------------------------------------------------------
*
* collationcmds.c
* collation-related commands support code
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/commands/collationcmds.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_collation_fn.h"
#include "commands/alter.h"
#include "commands/collationcmds.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/syscache.h"
/*
* CREATE COLLATION
*/
ObjectAddress
DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
{
char *collName;
Oid collNamespace;
AclResult aclresult;
ListCell *pl;
DefElem *fromEl = NULL;
DefElem *localeEl = NULL;
DefElem *lccollateEl = NULL;
DefElem *lcctypeEl = NULL;
DefElem *providerEl = NULL;
DefElem *versionEl = NULL;
char *collcollate = NULL;
char *collctype = NULL;
char *collproviderstr = NULL;
int collencoding;
char collprovider = 0;
char *collversion = NULL;
Oid newoid;
ObjectAddress address;
collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
get_namespace_name(collNamespace));
foreach(pl, parameters)
{
DefElem *defel = lfirst_node(DefElem, pl);
DefElem **defelp;
if (pg_strcasecmp(defel->defname, "from") == 0)
defelp = &fromEl;
else if (pg_strcasecmp(defel->defname, "locale") == 0)
defelp = &localeEl;
else if (pg_strcasecmp(defel->defname, "lc_collate") == 0)
defelp = &lccollateEl;
else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0)
defelp = &lcctypeEl;
else if (pg_strcasecmp(defel->defname, "provider") == 0)
defelp = &providerEl;
else if (pg_strcasecmp(defel->defname, "version") == 0)
defelp = &versionEl;
else
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("collation attribute \"%s\" not recognized",
defel->defname),
parser_errposition(pstate, defel->location)));
break;
}
*defelp = defel;
}
if ((localeEl && (lccollateEl || lcctypeEl))
|| (fromEl && list_length(parameters) != 1))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
if (fromEl)
{
Oid collid;
HeapTuple tp;
collid = get_collation_oid(defGetQualifiedName(fromEl), false);
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for collation %u", collid);
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
ReleaseSysCache(tp);
/*
* Copying the "default" collation is not allowed because most code
* checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
* and so having a second collation with COLLPROVIDER_DEFAULT would
* not work and potentially confuse or crash some code. This could be
* fixed with some legwork.
*/
if (collprovider == COLLPROVIDER_DEFAULT)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("collation \"default\" cannot be copied")));
}
if (localeEl)
{
collcollate = defGetString(localeEl);
collctype = defGetString(localeEl);
}
if (lccollateEl)
collcollate = defGetString(lccollateEl);
if (lcctypeEl)
collctype = defGetString(lcctypeEl);
if (providerEl)
collproviderstr = defGetString(providerEl);
if (versionEl)
collversion = defGetString(versionEl);
if (collproviderstr)
{
if (pg_strcasecmp(collproviderstr, "icu") == 0)
collprovider = COLLPROVIDER_ICU;
else if (pg_strcasecmp(collproviderstr, "libc") == 0)
collprovider = COLLPROVIDER_LIBC;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("unrecognized collation provider: %s",
collproviderstr)));
}
else if (!fromEl)
collprovider = COLLPROVIDER_LIBC;
if (!collcollate)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_collate\" must be specified")));
if (!collctype)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_ctype\" must be specified")));
if (collprovider == COLLPROVIDER_ICU)
collencoding = -1;
else
{
collencoding = GetDatabaseEncoding();
check_encoding_locale_matches(collencoding, collcollate, collctype);
}
if (!collversion)
collversion = get_collation_actual_version(collprovider, collcollate);
newoid = CollationCreate(collName,
collNamespace,
GetUserId(),
collprovider,
collencoding,
collcollate,
collctype,
collversion,
if_not_exists);
if (!OidIsValid(newoid))
return InvalidObjectAddress;
ObjectAddressSet(address, CollationRelationId, newoid);
/* check that the locales can be loaded */
CommandCounterIncrement();
(void) pg_newlocale_from_collation(newoid);
return address;
}
/*
* Subroutine for ALTER COLLATION SET SCHEMA and RENAME
*
* Is there a collation with the same name of the given collation already in
* the given namespace? If so, raise an appropriate error message.
*/
void
IsThereCollationInNamespace(const char *collname, Oid nspOid)
{
/* make sure the name doesn't already exist in new schema */
if (SearchSysCacheExists3(COLLNAMEENCNSP,
CStringGetDatum(collname),
Int32GetDatum(GetDatabaseEncoding()),
ObjectIdGetDatum(nspOid)))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
collname, GetDatabaseEncodingName(),
get_namespace_name(nspOid))));
/* mustn't match an any-encoding entry, either */
if (SearchSysCacheExists3(COLLNAMEENCNSP,
CStringGetDatum(collname),
Int32GetDatum(-1),
ObjectIdGetDatum(nspOid)))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" already exists in schema \"%s\"",
collname, get_namespace_name(nspOid))));
}
/*
* ALTER COLLATION
*/
ObjectAddress
AlterCollation(AlterCollationStmt *stmt)
{
Relation rel;
Oid collOid;
HeapTuple tup;
Form_pg_collation collForm;
Datum collversion;
bool isnull;
char *oldversion;
char *newversion;
ObjectAddress address;
rel = heap_open(CollationRelationId, RowExclusiveLock);
collOid = get_collation_oid(stmt->collname, false);
if (!pg_collation_ownercheck(collOid, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION,
NameListToString(stmt->collname));
tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for collation %u", collOid);
collForm = (Form_pg_collation) GETSTRUCT(tup);
collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
&isnull);
oldversion = isnull ? NULL : TextDatumGetCString(collversion);
newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
/* cannot change from NULL to non-NULL or vice versa */
if ((!oldversion && newversion) || (oldversion && !newversion))
elog(ERROR, "invalid collation version change");
else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
{
bool nulls[Natts_pg_collation];
bool replaces[Natts_pg_collation];
Datum values[Natts_pg_collation];
ereport(NOTICE,
(errmsg("changing version from %s to %s",
oldversion, newversion)));
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
memset(replaces, false, sizeof(replaces));
values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
replaces[Anum_pg_collation_collversion - 1] = true;
tup = heap_modify_tuple(tup, RelationGetDescr(rel),
values, nulls, replaces);
}
else
ereport(NOTICE,
(errmsg("version has not changed")));
CatalogTupleUpdate(rel, &tup->t_self, tup);
InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
ObjectAddressSet(address, CollationRelationId, collOid);
heap_freetuple(tup);
heap_close(rel, NoLock);
return address;
}
Datum
pg_collation_actual_version(PG_FUNCTION_ARGS)
{
Oid collid = PG_GETARG_OID(0);
HeapTuple tp;
char *collcollate;
char collprovider;
char *version;
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
if (!HeapTupleIsValid(tp))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("collation with OID %u does not exist", collid)));
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
ReleaseSysCache(tp);
version = get_collation_actual_version(collprovider, collcollate);
if (version)
PG_RETURN_TEXT_P(cstring_to_text(version));
else
PG_RETURN_NULL();
}
/*
* "Normalize" a libc locale name, stripping off encoding tags such as
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
* -> "br_FR@euro"). Return true if a new, different name was
* generated.
*/
pg_attribute_unused()
static bool
normalize_libc_locale_name(char *new, const char *old)
{
char *n = new;
const char *o = old;
bool changed = false;
while (*o)
{
if (*o == '.')
{
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
o++;
while ((*o >= 'A' && *o <= 'Z')
|| (*o >= 'a' && *o <= 'z')
|| (*o >= '0' && *o <= '9')
|| (*o == '-'))
o++;
changed = true;
}
else
*n++ = *o++;
}
*n = '\0';
return changed;
}
#ifdef USE_ICU
static char *
get_icu_language_tag(const char *localename)
{
char buf[ULOC_FULLNAME_CAPACITY];
UErrorCode status;
status = U_ZERO_ERROR;
uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not convert locale name \"%s\" to language tag: %s",
localename, u_errorName(status))));
return pstrdup(buf);
}
static char *
get_icu_locale_comment(const char *localename)
{
UErrorCode status;
UChar displayname[128];
int32 len_uchar;
char *result;
status = U_ZERO_ERROR;
len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could get display name for locale \"%s\": %s",
localename, u_errorName(status))));
icu_from_uchar(&result, displayname, len_uchar);
return result;
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
8 years ago
#endif /* USE_ICU */
Datum
pg_import_system_collations(PG_FUNCTION_ARGS)
{
bool if_not_exists = PG_GETARG_BOOL(0);
Oid nspid = PG_GETARG_OID(1);
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
FILE *locale_a_handle;
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
int count = 0;
List *aliaslist = NIL;
List *localelist = NIL;
List *enclist = NIL;
ListCell *lca,
*lcl,
*lce;
#endif
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to import system collations"))));
#if !(defined(HAVE_LOCALE_T) && !defined(WIN32)) && !defined(USE_ICU)
/* silence compiler warnings */
(void) if_not_exists;
(void) nspid;
#endif
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
locale_a_handle = OpenPipeStream("locale -a", "r");
if (locale_a_handle == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not execute command \"%s\": %m",
"locale -a")));
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
{
int i;
size_t len;
int enc;
bool skip;
char alias[NAMEDATALEN];
len = strlen(localebuf);
if (len == 0 || localebuf[len - 1] != '\n')
{
elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
continue;
}
localebuf[len - 1] = '\0';
/*
* Some systems have locale names that don't consist entirely of ASCII
* letters (such as "bokm&aring;l" or "fran&ccedil;ais"). This is
* pretty silly, since we need the locale itself to interpret the
* non-ASCII characters. We can't do much with those, so we filter
* them out.
*/
skip = false;
for (i = 0; i < len; i++)
{
if (IS_HIGHBIT_SET(localebuf[i]))
{
skip = true;
break;
}
}
if (skip)
{
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
continue;
}
enc = pg_get_encoding_from_locale(localebuf, false);
if (enc < 0)
{
/* error message printed by pg_get_encoding_from_locale() */
continue;
}
if (!PG_VALID_BE_ENCODING(enc))
continue; /* ignore locales for client-only encodings */
if (enc == PG_SQL_ASCII)
continue; /* C/POSIX are already in the catalog */
count++;
CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
localebuf, localebuf,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
if_not_exists);
CommandCounterIncrement();
/*
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
* ease of use. Note that collation names are unique per encoding
* only, so this doesn't clash with "en_US" for LATIN1, say.
*
* However, it might conflict with a name we'll see later in the
* "locale -a" output. So save up the aliases and try to add them
* after we've read all the output.
*/
if (normalize_libc_locale_name(alias, localebuf))
{
aliaslist = lappend(aliaslist, pstrdup(alias));
localelist = lappend(localelist, pstrdup(localebuf));
enclist = lappend_int(enclist, enc);
}
}
ClosePipeStream(locale_a_handle);
/* Now try to add any aliases we created */
forthree(lca, aliaslist, lcl, localelist, lce, enclist)
{
char *alias = (char *) lfirst(lca);
char *locale = (char *) lfirst(lcl);
int enc = lfirst_int(lce);
CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
locale, locale,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true);
CommandCounterIncrement();
}
if (count == 0)
ereport(WARNING,
(errmsg("no usable system locales were found")));
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
8 years ago
#endif /* not HAVE_LOCALE_T && not WIN32 */
#ifdef USE_ICU
if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
{
ereport(NOTICE,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("encoding \"%s\" not supported by ICU",
pg_encoding_to_char(GetDatabaseEncoding()))));
}
else
{
int i;
/*
* Start the loop at -1 to sneak in the root locale without too much
* code duplication.
*/
for (i = -1; i < ucol_countAvailable(); i++)
{
const char *name;
char *langtag;
const char *collcollate;
UEnumeration *en;
UErrorCode status;
const char *val;
Oid collid;
if (i == -1)
name = ""; /* ICU root locale */
else
name = ucol_getAvailable(i);
langtag = get_icu_language_tag(name);
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(), COLLPROVIDER_ICU, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
if_not_exists);
CreateComments(collid, CollationRelationId, 0,
get_icu_locale_comment(name));
/*
* Add keyword variants
*/
status = U_ZERO_ERROR;
en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not get keyword values for locale \"%s\": %s",
name, u_errorName(status))));
status = U_ZERO_ERROR;
uenum_reset(en, &status);
while ((val = uenum_next(en, NULL, &status)))
{
char *localeid = psprintf("%s@collation=%s", name, val);
langtag = get_icu_language_tag(localeid);
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(), COLLPROVIDER_ICU, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
if_not_exists);
CreateComments(collid, CollationRelationId, 0,
get_icu_locale_comment(localeid));
}
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not get keyword values for locale \"%s\": %s",
name, u_errorName(status))));
uenum_close(en);
}
}
#endif
PG_RETURN_VOID();
}