|
|
|
/*-----------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* PostgreSQL locale utilities
|
|
|
|
*
|
|
|
|
* src/include/utils/pg_locale.h
|
|
|
|
*
|
|
|
|
* Copyright (c) 2002-2022, PostgreSQL Global Development Group
|
|
|
|
*
|
|
|
|
*-----------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _PG_LOCALE_
|
|
|
|
#define _PG_LOCALE_
|
|
|
|
|
Cope if platform declares mbstowcs_l(), but not locale_t, in <xlocale.h>.
Previously, we included <xlocale.h> only if necessary to get the definition
of type locale_t. According to notes in PGAC_TYPE_LOCALE_T, this is
important because on some versions of glibc that file supplies an
incompatible declaration of locale_t. (This info may be obsolete, because
on my RHEL6 box that seems to be the *only* definition of locale_t; but
there may still be glibc's in the wild for which it's a live concern.)
It turns out though that on FreeBSD and maybe other BSDen, you can get
locale_t from stdlib.h or locale.h but mbstowcs_l() and friends only from
<xlocale.h>. This was leaving us compiling calls to mbstowcs_l() and
friends with no visible prototype, which causes a warning and could
possibly cause actual trouble, since it's not declared to return int.
Hence, adjust the configure checks so that we'll include <xlocale.h>
either if it's necessary to get type locale_t or if it's necessary to
get a declaration of mbstowcs_l().
Report and patch by Aleksander Alekseev, somewhat whacked around by me.
Back-patch to all supported branches, since we have been using
mbstowcs_l() since 9.1.
10 years ago
|
|
|
#if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE)
|
|
|
|
#include <xlocale.h>
|
|
|
|
#endif
|
|
|
|
#ifdef USE_ICU
|
|
|
|
#include <unicode/ucol.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "utils/guc.h"
|
|
|
|
|
|
|
|
#ifdef USE_ICU
|
|
|
|
/*
|
|
|
|
* ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
|
|
|
|
* (see
|
|
|
|
* <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
|
|
|
|
*/
|
|
|
|
#if U_ICU_VERSION_MAJOR_NUM >= 53
|
|
|
|
#define HAVE_UCOL_STRCOLLUTF8 1
|
|
|
|
#else
|
|
|
|
#undef HAVE_UCOL_STRCOLLUTF8
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* use for libc locale names */
|
|
|
|
#define LOCALE_NAME_BUFLEN 128
|
|
|
|
|
|
|
|
/* GUC settings */
|
|
|
|
extern char *locale_messages;
|
|
|
|
extern char *locale_monetary;
|
|
|
|
extern char *locale_numeric;
|
|
|
|
extern char *locale_time;
|
|
|
|
|
|
|
|
/* lc_time localization cache */
|
|
|
|
extern char *localized_abbrev_days[];
|
|
|
|
extern char *localized_full_days[];
|
|
|
|
extern char *localized_abbrev_months[];
|
|
|
|
extern char *localized_full_months[];
|
|
|
|
|
|
|
|
|
|
|
|
extern bool check_locale_messages(char **newval, void **extra, GucSource source);
|
|
|
|
extern void assign_locale_messages(const char *newval, void *extra);
|
|
|
|
extern bool check_locale_monetary(char **newval, void **extra, GucSource source);
|
|
|
|
extern void assign_locale_monetary(const char *newval, void *extra);
|
|
|
|
extern bool check_locale_numeric(char **newval, void **extra, GucSource source);
|
|
|
|
extern void assign_locale_numeric(const char *newval, void *extra);
|
|
|
|
extern bool check_locale_time(char **newval, void **extra, GucSource source);
|
|
|
|
extern void assign_locale_time(const char *newval, void *extra);
|
|
|
|
|
Replace empty locale name with implied value in CREATE DATABASE and initdb.
setlocale() accepts locale name "" as meaning "the locale specified by the
process's environment variables". Historically we've accepted that for
Postgres' locale settings, too. However, it's fairly unsafe to store an
empty string in a new database's pg_database.datcollate or datctype fields,
because then the interpretation could vary across postmaster restarts,
possibly resulting in index corruption and other unpleasantness.
Instead, we should expand "" to whatever it means at the moment of calling
CREATE DATABASE, which we can do by saving the value returned by
setlocale().
For consistency, make initdb set up the initial lc_xxx parameter values the
same way. initdb was already doing the right thing for empty locale names,
but it did not replace non-empty names with setlocale results. On a
platform where setlocale chooses to canonicalize the spellings of locale
names, this would result in annoying inconsistency. (It seems that popular
implementations of setlocale don't do such canonicalization, which is a
pity, but the POSIX spec certainly allows it to be done.) The same risk
of inconsistency leads me to not venture back-patching this, although it
could certainly be seen as a longstanding bug.
Per report from Jeff Davis, though this is not his proposed patch.
14 years ago
|
|
|
extern bool check_locale(int category, const char *locale, char **canonname);
|
|
|
|
extern char *pg_perm_setlocale(int category, const char *locale);
|
|
|
|
extern void check_strxfrm_bug(void);
|
|
|
|
|
|
|
|
extern bool lc_collate_is_c(Oid collation);
|
|
|
|
extern bool lc_ctype_is_c(Oid collation);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the POSIX lconv struct (contains number/money formatting
|
|
|
|
* information) with locale information for all categories.
|
|
|
|
*/
|
|
|
|
extern struct lconv *PGLC_localeconv(void);
|
|
|
|
|
|
|
|
extern void cache_locale_time(void);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We define our own wrapper around locale_t so we can keep the same
|
|
|
|
* function signatures for all builds, while not having to create a
|
|
|
|
* fake version of the standard type locale_t in the global namespace.
|
|
|
|
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
|
|
|
*/
|
|
|
|
struct pg_locale_struct
|
|
|
|
{
|
|
|
|
char provider;
|
|
|
|
bool deterministic;
|
|
|
|
union
|
|
|
|
{
|
|
|
|
#ifdef HAVE_LOCALE_T
|
|
|
|
locale_t lt;
|
|
|
|
#endif
|
|
|
|
#ifdef USE_ICU
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
const char *locale;
|
|
|
|
UCollator *ucol;
|
|
|
|
} icu;
|
|
|
|
#endif
|
|
|
|
int dummy; /* in case we have neither LOCALE_T nor ICU */
|
|
|
|
} info;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct pg_locale_struct *pg_locale_t;
|
|
|
|
|
|
|
|
extern struct pg_locale_struct default_locale;
|
|
|
|
|
|
|
|
extern void make_icu_collator(const char *iculocstr,
|
|
|
|
struct pg_locale_struct *resultp);
|
|
|
|
|
|
|
|
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
|
|
|
|
|
|
|
|
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
|
|
|
|
|
|
|
|
#ifdef USE_ICU
|
|
|
|
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
Fix memory leakage in ICU encoding conversion, and other code review.
Callers of icu_to_uchar() neglected to pfree the result string when done
with it. This results in catastrophic memory leaks in varstr_cmp(),
because of our prevailing assumption that btree comparison functions don't
leak memory. For safety, make all the call sites clean up leaks, though
I suspect that we could get away without it in formatting.c. I audited
callers of icu_from_uchar() as well, but found no places that seemed to
have a comparable issue.
Add function API specifications for icu_to_uchar() and icu_from_uchar();
the lack of any thought-through specification is perhaps not unrelated
to the existence of this bug in the first place. Fix icu_to_uchar()
to guarantee a nul-terminated result; although no existing caller appears
to care, the fact that it would have been nul-terminated except in
extreme corner cases seems ideally designed to bite someone on the rear
someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst
case, that could perhaps have led to a non-nul-terminated result, too.
Fix icu_from_uchar() to have a more reasonable definition of the function
result --- no callers are actually paying attention, so this isn't a live
bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s
input string for consistency.
That is not the end of what needs to be done to these functions, but
it's as much as I have the patience for right now.
Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
8 years ago
|
|
|
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
|
|
|
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
|
|
|
|
pg_locale_t locale);
|
|
|
|
extern size_t char2wchar(wchar_t *to, size_t tolen,
|
|
|
|
const char *from, size_t fromlen, pg_locale_t locale);
|
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
8 years ago
|
|
|
#endif /* _PG_LOCALE_ */
|