mirror of https://github.com/postgres/postgres
> > > > It was made to cope with encoding such as an Asian bloc in 7.2Beta2. > > > > > > > > Added ServerEncoding > > > > Korean (JOHAB), Thai (WIN874), > > > > Vietnamese (TCVN), Arabic (WIN1256) > > > > > > > > Added ClientEncoding > > > > Simplified Chinese (GBK), Korean (UHC) > > > > > > > > > > > > > http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2b2.newencoding.diff.tar.gz > > > > (608K) > > > > > > Looks good. I need some people to review this for me. > > > > For me they look good too. The only missing part is a > > documentation. I will ask him to write it up. If he couldn't, I will > > do it for him. > > > The diff is 3mb > > > but appears to address only additions to multibyte. I have attached a > > > list of files it modifies. Also, look at the sizes of the mb/ > > > directory. It is getting large: > > > > > > 4 ./CVS > > > 6 ./Unicode/CVS > > > 3433 ./Unicode > > > 6197 . > > > > Yes. We definitely need the on-the-fly encoding addition capability: > > i.e. CREATE CHRACTER SET in the future... > > -- > > Tatsuo Ishii > > > > Address chainge. http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2.newencoding.diff.gz Add PsqlODBC and document ...etc patch. Eiji Tokuyaecpg_big_bison
parent
03194432de
commit
a8bd7e1c6e
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,138 +1,330 @@ |
|||||||
/*--------
|
/*--------
|
||||||
* Module : multibyte.c |
* Module : multibyte.c |
||||||
* |
* |
||||||
* Description: Mlutibyte related additional function. |
* Description: New Mlutibyte related additional function. |
||||||
* |
* |
||||||
* Create 2001-03-03 Eiji Tokuya |
* Create 2001-03-03 Eiji Tokuya |
||||||
|
* New Create 2001-09-16 Eiji Tokuya |
||||||
*-------- |
*-------- |
||||||
*/ |
*/ |
||||||
|
|
||||||
#include "multibyte.h" |
#include "multibyte.h" |
||||||
|
#include "connection.h" |
||||||
|
#include "pgapifunc.h" |
||||||
#include <string.h> |
#include <string.h> |
||||||
|
#include <ctype.h> |
||||||
|
#include <stdio.h> |
||||||
|
#include <stdlib.h> |
||||||
|
|
||||||
int multibyte_client_encoding; /* Multibyte Client Encoding. */ |
int PG_CCST; /* Client Charcter Status */ |
||||||
int multibyte_status; /* Multibyte Odds and ends character. */ |
|
||||||
|
|
||||||
|
int PG_SCSC; /* Server Charcter Set (code) */ |
||||||
|
int PG_CCSC; /* Client Charcter Set (code) */ |
||||||
|
unsigned char *PG_SCSS; /* Server Charcter Set (string) */ |
||||||
|
unsigned char *PG_CCSS; /* Client Charcter Set (string) */ |
||||||
|
|
||||||
|
pg_CS CS_Table[] = |
||||||
|
{ |
||||||
|
{ "SQL_ASCII", SQL_ASCII }, |
||||||
|
{ "EUC_JP", EUC_JP }, |
||||||
|
{ "EUC_CN", EUC_CN }, |
||||||
|
{ "EUC_KR", EUC_KR }, |
||||||
|
{ "EUC_TW", EUC_TW }, |
||||||
|
{ "JOHAB", JOHAB }, |
||||||
|
{ "UNICODE", UTF8 }, |
||||||
|
{ "MULE_INTERNAL",MULE_INTERNAL }, |
||||||
|
{ "LATIN1", LATIN1 }, |
||||||
|
{ "LATIN2", LATIN2 }, |
||||||
|
{ "LATIN3", LATIN3 }, |
||||||
|
{ "LATIN4", LATIN4 }, |
||||||
|
{ "LATIN5", LATIN5 }, |
||||||
|
{ "LATIN6", LATIN6 }, |
||||||
|
{ "LATIN7", LATIN7 }, |
||||||
|
{ "LATIN8", LATIN8 }, |
||||||
|
{ "LATIN9", LATIN9 }, |
||||||
|
{ "LATIN10", LATIN10 }, |
||||||
|
{ "WIN1256", WIN1256 }, |
||||||
|
{ "TCVN", TCVN }, |
||||||
|
{ "WIN874", WIN874 }, |
||||||
|
{ "KOI8", KOI8R }, |
||||||
|
{ "WIN", WIN1251 }, |
||||||
|
{ "ALT", ALT }, |
||||||
|
{ "ISO_8859_5", ISO_8859_5 }, |
||||||
|
{ "ISO_8859_6", ISO_8859_6 }, |
||||||
|
{ "ISO_8859_7", ISO_8859_7 }, |
||||||
|
{ "ISO_8859_8", ISO_8859_8 }, |
||||||
|
|
||||||
|
|
||||||
|
{ "SJIS", SJIS }, |
||||||
|
{ "BIG5", BIG5 }, |
||||||
|
{ "GBK", GBK }, |
||||||
|
{ "UHC", UHC }, |
||||||
|
{ "WIN1250", WIN1250 }, |
||||||
|
{ "OTHER", OTHER } |
||||||
|
}; |
||||||
|
|
||||||
|
int |
||||||
|
pg_ismb(int characterset_code) |
||||||
|
{ |
||||||
|
int i=0,MB_CHARACTERSET[]={EUC_JP,EUC_CN,EUC_KR,EUC_TW,UTF8,MULE_INTERNAL,SJIS,BIG5,GBK,UHC,JOHAB}; |
||||||
|
|
||||||
|
while (MB_CHARACTERSET[i] != characterset_code || OTHER != MB_CHARACTERSET[i] ) |
||||||
|
{ |
||||||
|
i++; |
||||||
|
} |
||||||
|
return (MB_CHARACTERSET[i]); |
||||||
|
} |
||||||
|
|
||||||
|
int |
||||||
|
pg_CS_code(const unsigned char *characterset_string) |
||||||
|
{ |
||||||
|
int i = 0, c; |
||||||
|
for(i = 0; CS_Table[i].code != OTHER; i++) |
||||||
|
{ |
||||||
|
if (strstr(characterset_string,CS_Table[i].name)) |
||||||
|
c = CS_Table[i].code; |
||||||
|
} |
||||||
|
return (c); |
||||||
|
} |
||||||
|
|
||||||
unsigned char * |
unsigned char * |
||||||
multibyte_strchr(const unsigned char *s, unsigned char c) |
pg_CS_name(const int characterset_code) |
||||||
{ |
{ |
||||||
int mb_st = 0, |
int i = 0; |
||||||
i = 0; |
for (i = 0; CS_Table[i].code != OTHER; i++) |
||||||
|
{ |
||||||
|
if (CS_Table[i].code == characterset_code) |
||||||
|
return CS_Table[i].name; |
||||||
|
} |
||||||
|
return ("OTHER"); |
||||||
|
} |
||||||
|
|
||||||
while (!(mb_st == 0 && (s[i] == c || s[i] == 0))) |
int |
||||||
|
pg_CS_stat(int stat,unsigned int character,int characterset_code) |
||||||
|
{ |
||||||
|
if (character == 0) |
||||||
|
stat = 0; |
||||||
|
switch (characterset_code) |
||||||
{ |
{ |
||||||
if (s[i] == 0) |
case UTF8: |
||||||
return (0); |
|
||||||
switch (multibyte_client_encoding) |
|
||||||
{ |
{ |
||||||
|
if (stat < 2 && |
||||||
|
character >= 0x80) |
||||||
|
{ |
||||||
|
if (character >= 0xfc) |
||||||
|
stat = 6; |
||||||
|
else if (character >= 0xf8) |
||||||
|
stat = 5; |
||||||
|
else if (character >= 0xf0) |
||||||
|
stat = 4; |
||||||
|
else if (character >= 0xe0) |
||||||
|
stat = 3; |
||||||
|
else if (character >= 0xc0) |
||||||
|
stat = 2; |
||||||
|
} |
||||||
|
else if (stat > 2 && |
||||||
|
character > 0x7f) |
||||||
|
stat--; |
||||||
|
else |
||||||
|
stat=0; |
||||||
|
} |
||||||
|
break; |
||||||
|
/* Shift-JIS Support. */ |
||||||
case SJIS: |
case SJIS: |
||||||
{ |
{ |
||||||
if (mb_st < 2 && s[i] > 0x80 && !(s[i] > 0x9f && s[i] < 0xe0)) |
if (stat < 2 && |
||||||
mb_st = 2; |
character > 0x80 && |
||||||
else if (mb_st == 2) |
!(character > 0x9f && |
||||||
mb_st = 1; |
character < 0xe0)) |
||||||
|
stat = 2; |
||||||
|
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
else |
else |
||||||
mb_st = 0; |
stat = 0; |
||||||
} |
} |
||||||
break; |
break; |
||||||
|
|
||||||
/* Chinese Big5 Support. */ |
/* Chinese Big5 Support. */ |
||||||
case BIG5: |
case BIG5: |
||||||
{ |
{ |
||||||
if (mb_st < 2 && s[i] > 0xA0) |
if (stat < 2 && |
||||||
mb_st = 2; |
character > 0xA0) |
||||||
else if (mb_st == 2) |
stat = 2; |
||||||
mb_st = 1; |
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
else |
else |
||||||
mb_st = 0; |
stat = 0; |
||||||
} |
} |
||||||
break; |
break; |
||||||
default: |
/* Chinese GBK Support. */ |
||||||
mb_st = 0; |
case GBK: |
||||||
|
{ |
||||||
|
if (stat < 2 && |
||||||
|
character > 0x7F) |
||||||
|
stat = 2; |
||||||
|
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
|
else |
||||||
|
stat = 0; |
||||||
} |
} |
||||||
i++; |
break; |
||||||
|
|
||||||
|
/* Korian UHC Support. */ |
||||||
|
case UHC: |
||||||
|
{ |
||||||
|
if (stat < 2 && |
||||||
|
character > 0x7F) |
||||||
|
stat = 2; |
||||||
|
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
|
else |
||||||
|
stat = 0; |
||||||
} |
} |
||||||
#ifdef _DEBUG |
break; |
||||||
qlog("i = %d\n", i); |
|
||||||
#endif |
|
||||||
return (char *) (s + i); |
|
||||||
} |
|
||||||
|
|
||||||
|
/* EUC_JP Support */ |
||||||
|
case EUC_JP: |
||||||
|
{ |
||||||
|
if (stat < 3 &&
|
||||||
|
character == 0x8f) /* JIS X 0212 */ |
||||||
|
stat = 3; |
||||||
|
else |
||||||
|
if (stat != 2 &&
|
||||||
|
(character == 0x8e || |
||||||
|
character > 0xa0)) /* Half Katakana HighByte & Kanji HighByte */ |
||||||
|
stat = 2; |
||||||
|
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
|
else |
||||||
|
stat = 0; |
||||||
|
} |
||||||
|
break; |
||||||
|
|
||||||
void |
/* EUC_CN, EUC_KR, JOHAB Support */ |
||||||
multibyte_init(void) |
case EUC_CN: |
||||||
{ |
case EUC_KR: |
||||||
multibyte_status = 0; |
case JOHAB: |
||||||
|
{ |
||||||
|
if (stat < 2 && |
||||||
|
character > 0xa0) |
||||||
|
stat = 2; |
||||||
|
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
|
else |
||||||
|
stat = 0; |
||||||
|
} |
||||||
|
break; |
||||||
|
case EUC_TW: |
||||||
|
{ |
||||||
|
if (stat < 4 && |
||||||
|
character == 0x8e) |
||||||
|
stat = 4; |
||||||
|
else if (stat == 4 && |
||||||
|
character > 0xa0) |
||||||
|
stat = 3; |
||||||
|
else if (stat == 3 || |
||||||
|
stat < 2 && |
||||||
|
character > 0xa0) |
||||||
|
stat = 2; |
||||||
|
else if (stat == 2) |
||||||
|
stat = 1; |
||||||
|
else |
||||||
|
stat = 0; |
||||||
|
} |
||||||
|
break; |
||||||
|
default: |
||||||
|
{ |
||||||
|
stat = 0; |
||||||
|
} |
||||||
|
break; |
||||||
|
} |
||||||
|
return stat; |
||||||
} |
} |
||||||
|
|
||||||
|
|
||||||
unsigned char * |
unsigned char * |
||||||
check_client_encoding(unsigned char *str) |
pg_mbschr(const unsigned char *string, unsigned int character) |
||||||
{ |
{ |
||||||
if (strstr(str, "%27SJIS%27") || |
int mb_st = 0; |
||||||
strstr(str, "%27Shift_JIS%27") || |
unsigned char *s; |
||||||
strstr(str, "'SJIS'") || |
s = (unsigned char *) string; |
||||||
strstr(str, "'sjis'") || |
|
||||||
strstr(str, "'Shift_JIS'")) |
for(;;)
|
||||||
{ |
|
||||||
multibyte_client_encoding = SJIS; |
|
||||||
return ("SJIS"); |
|
||||||
} |
|
||||||
if (strstr(str, "%27BIG5%27") || |
|
||||||
strstr(str, "%27Big5%27") || |
|
||||||
strstr(str, "'BIG5'") || |
|
||||||
strstr(str, "'big5'") || |
|
||||||
strstr(str, "'Big5'")) |
|
||||||
{ |
{ |
||||||
multibyte_client_encoding = BIG5; |
mb_st = pg_CS_stat(mb_st, (unsigned char) *s,PG_CCSC); |
||||||
return ("BIG5"); |
if (mb_st == 0 && (*s == character || *s == 0)) |
||||||
|
break; |
||||||
|
else |
||||||
|
s++; |
||||||
} |
} |
||||||
return ("OTHER"); |
return (s); |
||||||
} |
} |
||||||
|
|
||||||
|
|
||||||
/*--------
|
|
||||||
* Multibyte Status Function. |
|
||||||
* Input char |
|
||||||
* Output 0 : 1 Byte Character. |
|
||||||
* 1 : MultibyteCharacter Last Byte. |
|
||||||
* N : MultibyteCharacter Fast or Middle Byte. |
|
||||||
*-------- |
|
||||||
*/ |
|
||||||
int |
int |
||||||
multibyte_char_check(unsigned char s) |
pg_mbslen(const unsigned char *string) |
||||||
{ |
{ |
||||||
switch (multibyte_client_encoding) |
unsigned char *s; |
||||||
|
int len, cs_stat; |
||||||
|
for (len = 0, cs_stat = 0, s = (unsigned char *) string; *s != 0; s++) |
||||||
{ |
{ |
||||||
/* Japanese Shift-JIS(CP932) Support. */ |
cs_stat = pg_CS_stat(cs_stat,(unsigned int) *s, PG_CCSC); |
||||||
case SJIS: |
if (cs_stat < 2) |
||||||
{ |
len++; |
||||||
if (multibyte_status < 2 && s > 0x80 && !(s > 0x9f && s < 0xE0)) |
|
||||||
multibyte_status = 2; |
|
||||||
else if (multibyte_status == 2) |
|
||||||
multibyte_status = 1; |
|
||||||
else |
|
||||||
multibyte_status = 0; |
|
||||||
} |
} |
||||||
break; |
return len; |
||||||
|
} |
||||||
|
|
||||||
/* Chinese Big5(CP950) Support. */ |
unsigned char * |
||||||
case BIG5: |
pg_mbsinc(const unsigned char *current ) |
||||||
|
{ |
||||||
|
int mb_stat = 0; |
||||||
|
if (*current != 0) |
||||||
{ |
{ |
||||||
if (multibyte_status < 2 && s > 0xA0) |
mb_stat = (int) pg_CS_stat(mb_stat, *current, PG_CCSC); |
||||||
multibyte_status = 2; |
if (mb_stat == 0) |
||||||
else if (multibyte_status == 2) |
mb_stat = 1; |
||||||
multibyte_status = 1; |
return ((unsigned char *) current + mb_stat); |
||||||
|
} |
||||||
else |
else |
||||||
multibyte_status = 0; |
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
void |
||||||
|
CC_lookup_characterset(ConnectionClass *self) |
||||||
|
{ |
||||||
|
HSTMT hstmt; |
||||||
|
StatementClass *stmt; |
||||||
|
RETCODE result; |
||||||
|
static char *func = "CC_lookup_characterset"; |
||||||
|
|
||||||
|
mylog("%s: entering...\n", func); |
||||||
|
PG_SCSS = malloc(MAX_CHARACTERSET_NAME); |
||||||
|
PG_CCSS = malloc(MAX_CHARACTERSET_NAME); |
||||||
|
|
||||||
|
result = PGAPI_AllocStmt(self, &hstmt); |
||||||
|
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||||
|
return; |
||||||
|
stmt = (StatementClass *) hstmt; |
||||||
|
|
||||||
|
result = PGAPI_ExecDirect(hstmt, "Show Client_Encoding", SQL_NTS); |
||||||
|
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||||
|
{ |
||||||
|
PGAPI_FreeStmt(hstmt, SQL_DROP); |
||||||
|
return; |
||||||
} |
} |
||||||
break; |
result = PGAPI_AllocStmt(self, &hstmt); |
||||||
default: |
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||||
multibyte_status = 0; |
return; |
||||||
|
stmt = (StatementClass *) hstmt; |
||||||
|
|
||||||
|
result = PGAPI_ExecDirect(hstmt, "Show Server_Encoding", SQL_NTS); |
||||||
|
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||||
|
{ |
||||||
|
PGAPI_FreeStmt(hstmt, SQL_DROP); |
||||||
|
return; |
||||||
} |
} |
||||||
#ifdef _DEBUG |
|
||||||
qlog("multibyte_client_encoding = %d s = 0x%02X multibyte_stat = %d\n", multibyte_client_encoding, s, multibyte_status); |
strcpy(PG_SCSS , pg_CS_name(PG_SCSC = pg_CS_code(PG_SCSS))); |
||||||
#endif |
strcpy(PG_CCSS , pg_CS_name(PG_CCSC = pg_CS_code(PG_CCSS))); |
||||||
return (multibyte_status); |
|
||||||
|
qlog(" [ Server encoding = '%s' (code = %d), Client encoding = '%s' (code = %d) ]\n", PG_SCSS, PG_SCSC, PG_CCSS, PG_CCSC); |
||||||
} |
} |
||||||
|
Loading…
Reference in new issue