|
|
|
|
@ -4,7 +4,7 @@ |
|
|
|
|
* (currently mule internal code (mic) is used) |
|
|
|
|
* Tatsuo Ishii |
|
|
|
|
* |
|
|
|
|
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $ |
|
|
|
|
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $ |
|
|
|
|
*/ |
|
|
|
|
#include "postgres.h" |
|
|
|
|
|
|
|
|
|
@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef USE_WIDE_UPPER_LOWER |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* wchar2char --- convert wide characters to multibyte format |
|
|
|
|
* |
|
|
|
|
* This has the same API as the standard wcstombs() function; in particular, |
|
|
|
|
* tolen is the maximum number of bytes to store at *to, and *from must be |
|
|
|
|
* zero-terminated. The output will be zero-terminated iff there is room. |
|
|
|
|
*/ |
|
|
|
|
size_t |
|
|
|
|
wchar2char(char *to, const wchar_t *from, size_t tolen) |
|
|
|
|
{ |
|
|
|
|
size_t result; |
|
|
|
|
|
|
|
|
|
if (tolen == 0) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
#ifdef WIN32 |
|
|
|
|
/*
|
|
|
|
|
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, |
|
|
|
|
* and for some reason mbstowcs and wcstombs won't do this for us, |
|
|
|
|
* so we use MultiByteToWideChar(). |
|
|
|
|
*/ |
|
|
|
|
if (GetDatabaseEncoding() == PG_UTF8) |
|
|
|
|
{ |
|
|
|
|
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, |
|
|
|
|
NULL, NULL); |
|
|
|
|
/* A zero return is failure */ |
|
|
|
|
if (result <= 0) |
|
|
|
|
result = -1; |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
Assert(result <= tolen); |
|
|
|
|
/* Microsoft counts the zero terminator in the result */ |
|
|
|
|
result--; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
#endif /* WIN32 */ |
|
|
|
|
result = wcstombs(to, from, tolen); |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* char2wchar --- convert multibyte characters to wide characters |
|
|
|
|
* |
|
|
|
|
* This has almost the API of mbstowcs(), except that *from need not be |
|
|
|
|
* null-terminated; instead, the number of input bytes is specified as |
|
|
|
|
* fromlen. Also, we ereport() rather than returning -1 for invalid |
|
|
|
|
* input encoding. tolen is the maximum number of wchar_t's to store at *to. |
|
|
|
|
* The output will be zero-terminated iff there is room. |
|
|
|
|
*/ |
|
|
|
|
size_t |
|
|
|
|
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) |
|
|
|
|
{ |
|
|
|
|
size_t result; |
|
|
|
|
|
|
|
|
|
if (tolen == 0) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
#ifdef WIN32 |
|
|
|
|
/* See WIN32 "Unicode" comment above */ |
|
|
|
|
if (GetDatabaseEncoding() == PG_UTF8) |
|
|
|
|
{ |
|
|
|
|
/* Win32 API does not work for zero-length input */ |
|
|
|
|
if (fromlen == 0) |
|
|
|
|
result = 0; |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); |
|
|
|
|
/* A zero return is failure */ |
|
|
|
|
if (result == 0) |
|
|
|
|
result = -1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (result != -1) |
|
|
|
|
{ |
|
|
|
|
Assert(result < tolen); |
|
|
|
|
/* Append trailing null wchar (MultiByteToWideChar() does not) */ |
|
|
|
|
to[result] = 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
#endif /* WIN32 */ |
|
|
|
|
{ |
|
|
|
|
if (lc_ctype_is_c()) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be |
|
|
|
|
* allocated with sufficient space |
|
|
|
|
*/ |
|
|
|
|
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
/* mbstowcs requires ending '\0' */ |
|
|
|
|
char *str = pnstrdup(from, fromlen); |
|
|
|
|
|
|
|
|
|
result = mbstowcs(to, str, tolen); |
|
|
|
|
pfree(str); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (result == -1) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* Invalid multibyte character encountered. We try to give a useful |
|
|
|
|
* error message by letting pg_verifymbstr check the string. But it's |
|
|
|
|
* possible that the string is OK to us, and not OK to mbstowcs --- |
|
|
|
|
* this suggests that the LC_CTYPE locale is different from the |
|
|
|
|
* database encoding. Give a generic error message if verifymbstr |
|
|
|
|
* can't find anything wrong. |
|
|
|
|
*/ |
|
|
|
|
pg_verifymbstr(from, fromlen, false); /* might not return */ |
|
|
|
|
/* but if it does ... */ |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), |
|
|
|
|
errmsg("invalid multibyte character for locale"), |
|
|
|
|
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); |
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* convert a multibyte string to a wchar */ |
|
|
|
|
int |
|
|
|
|
pg_mb2wchar(const char *from, pg_wchar *to) |
|
|
|
|
|