mirror of https://github.com/postgres/postgres
> > > > It was made to cope with encoding such as an Asian bloc in 7.2Beta2. > > > > > > > > Added ServerEncoding > > > > Korean (JOHAB), Thai (WIN874), > > > > Vietnamese (TCVN), Arabic (WIN1256) > > > > > > > > Added ClientEncoding > > > > Simplified Chinese (GBK), Korean (UHC) > > > > > > > > > > > > > http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2b2.newencoding.diff.tar.gz > > > > (608K) > > > > > > Looks good. I need some people to review this for me. > > > > For me they look good too. The only missing part is a > > documentation. I will ask him to write it up. If he couldn't, I will > > do it for him. > > > The diff is 3mb > > > but appears to address only additions to multibyte. I have attached a > > > list of files it modifies. Also, look at the sizes of the mb/ > > > directory. It is getting large: > > > > > > 4 ./CVS > > > 6 ./Unicode/CVS > > > 3433 ./Unicode > > > 6197 . > > > > Yes. We definitely need the on-the-fly encoding addition capability: > > i.e. CREATE CHRACTER SET in the future... > > -- > > Tatsuo Ishii > > > > Address chainge. http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2.newencoding.diff.gz Add PsqlODBC and document ...etc patch. Eiji Tokuyaecpg_big_bison
parent
03194432de
commit
a8bd7e1c6e
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,138 +1,330 @@ |
||||
/*--------
|
||||
* Module : multibyte.c |
||||
* |
||||
* Description: Mlutibyte related additional function. |
||||
* Description: New Mlutibyte related additional function. |
||||
* |
||||
* Create 2001-03-03 Eiji Tokuya |
||||
* New Create 2001-09-16 Eiji Tokuya |
||||
*-------- |
||||
*/ |
||||
|
||||
#include "multibyte.h" |
||||
#include "connection.h" |
||||
#include "pgapifunc.h" |
||||
#include <string.h> |
||||
#include <ctype.h> |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
|
||||
int multibyte_client_encoding; /* Multibyte Client Encoding. */ |
||||
int multibyte_status; /* Multibyte Odds and ends character. */ |
||||
int PG_CCST; /* Client Charcter Status */ |
||||
|
||||
int PG_SCSC; /* Server Charcter Set (code) */ |
||||
int PG_CCSC; /* Client Charcter Set (code) */ |
||||
unsigned char *PG_SCSS; /* Server Charcter Set (string) */ |
||||
unsigned char *PG_CCSS; /* Client Charcter Set (string) */ |
||||
|
||||
unsigned char * |
||||
multibyte_strchr(const unsigned char *s, unsigned char c) |
||||
pg_CS CS_Table[] = |
||||
{ |
||||
int mb_st = 0, |
||||
i = 0; |
||||
{ "SQL_ASCII", SQL_ASCII }, |
||||
{ "EUC_JP", EUC_JP }, |
||||
{ "EUC_CN", EUC_CN }, |
||||
{ "EUC_KR", EUC_KR }, |
||||
{ "EUC_TW", EUC_TW }, |
||||
{ "JOHAB", JOHAB }, |
||||
{ "UNICODE", UTF8 }, |
||||
{ "MULE_INTERNAL",MULE_INTERNAL }, |
||||
{ "LATIN1", LATIN1 }, |
||||
{ "LATIN2", LATIN2 }, |
||||
{ "LATIN3", LATIN3 }, |
||||
{ "LATIN4", LATIN4 }, |
||||
{ "LATIN5", LATIN5 }, |
||||
{ "LATIN6", LATIN6 }, |
||||
{ "LATIN7", LATIN7 }, |
||||
{ "LATIN8", LATIN8 }, |
||||
{ "LATIN9", LATIN9 }, |
||||
{ "LATIN10", LATIN10 }, |
||||
{ "WIN1256", WIN1256 }, |
||||
{ "TCVN", TCVN }, |
||||
{ "WIN874", WIN874 }, |
||||
{ "KOI8", KOI8R }, |
||||
{ "WIN", WIN1251 }, |
||||
{ "ALT", ALT }, |
||||
{ "ISO_8859_5", ISO_8859_5 }, |
||||
{ "ISO_8859_6", ISO_8859_6 }, |
||||
{ "ISO_8859_7", ISO_8859_7 }, |
||||
{ "ISO_8859_8", ISO_8859_8 }, |
||||
|
||||
while (!(mb_st == 0 && (s[i] == c || s[i] == 0))) |
||||
{ |
||||
if (s[i] == 0) |
||||
return (0); |
||||
switch (multibyte_client_encoding) |
||||
{ |
||||
case SJIS: |
||||
{ |
||||
if (mb_st < 2 && s[i] > 0x80 && !(s[i] > 0x9f && s[i] < 0xe0)) |
||||
mb_st = 2; |
||||
else if (mb_st == 2) |
||||
mb_st = 1; |
||||
else |
||||
mb_st = 0; |
||||
} |
||||
break; |
||||
|
||||
/* Chinese Big5 Support. */ |
||||
case BIG5: |
||||
{ |
||||
if (mb_st < 2 && s[i] > 0xA0) |
||||
mb_st = 2; |
||||
else if (mb_st == 2) |
||||
mb_st = 1; |
||||
else |
||||
mb_st = 0; |
||||
} |
||||
break; |
||||
default: |
||||
mb_st = 0; |
||||
} |
||||
{ "SJIS", SJIS }, |
||||
{ "BIG5", BIG5 }, |
||||
{ "GBK", GBK }, |
||||
{ "UHC", UHC }, |
||||
{ "WIN1250", WIN1250 }, |
||||
{ "OTHER", OTHER } |
||||
}; |
||||
|
||||
int |
||||
pg_ismb(int characterset_code) |
||||
{ |
||||
int i=0,MB_CHARACTERSET[]={EUC_JP,EUC_CN,EUC_KR,EUC_TW,UTF8,MULE_INTERNAL,SJIS,BIG5,GBK,UHC,JOHAB}; |
||||
|
||||
while (MB_CHARACTERSET[i] != characterset_code || OTHER != MB_CHARACTERSET[i] ) |
||||
{ |
||||
i++; |
||||
} |
||||
#ifdef _DEBUG |
||||
qlog("i = %d\n", i); |
||||
#endif |
||||
return (char *) (s + i); |
||||
return (MB_CHARACTERSET[i]); |
||||
} |
||||
|
||||
|
||||
void |
||||
multibyte_init(void) |
||||
int |
||||
pg_CS_code(const unsigned char *characterset_string) |
||||
{ |
||||
multibyte_status = 0; |
||||
int i = 0, c; |
||||
for(i = 0; CS_Table[i].code != OTHER; i++) |
||||
{ |
||||
if (strstr(characterset_string,CS_Table[i].name)) |
||||
c = CS_Table[i].code; |
||||
} |
||||
return (c); |
||||
} |
||||
|
||||
|
||||
unsigned char * |
||||
check_client_encoding(unsigned char *str) |
||||
pg_CS_name(const int characterset_code) |
||||
{ |
||||
if (strstr(str, "%27SJIS%27") || |
||||
strstr(str, "%27Shift_JIS%27") || |
||||
strstr(str, "'SJIS'") || |
||||
strstr(str, "'sjis'") || |
||||
strstr(str, "'Shift_JIS'")) |
||||
{ |
||||
multibyte_client_encoding = SJIS; |
||||
return ("SJIS"); |
||||
} |
||||
if (strstr(str, "%27BIG5%27") || |
||||
strstr(str, "%27Big5%27") || |
||||
strstr(str, "'BIG5'") || |
||||
strstr(str, "'big5'") || |
||||
strstr(str, "'Big5'")) |
||||
int i = 0; |
||||
for (i = 0; CS_Table[i].code != OTHER; i++) |
||||
{ |
||||
multibyte_client_encoding = BIG5; |
||||
return ("BIG5"); |
||||
if (CS_Table[i].code == characterset_code) |
||||
return CS_Table[i].name; |
||||
} |
||||
return ("OTHER"); |
||||
} |
||||
|
||||
|
||||
/*--------
|
||||
* Multibyte Status Function. |
||||
* Input char |
||||
* Output 0 : 1 Byte Character. |
||||
* 1 : MultibyteCharacter Last Byte. |
||||
* N : MultibyteCharacter Fast or Middle Byte. |
||||
*-------- |
||||
*/ |
||||
int |
||||
multibyte_char_check(unsigned char s) |
||||
pg_CS_stat(int stat,unsigned int character,int characterset_code) |
||||
{ |
||||
switch (multibyte_client_encoding) |
||||
if (character == 0) |
||||
stat = 0; |
||||
switch (characterset_code) |
||||
{ |
||||
/* Japanese Shift-JIS(CP932) Support. */ |
||||
case SJIS: |
||||
case UTF8: |
||||
{ |
||||
if (multibyte_status < 2 && s > 0x80 && !(s > 0x9f && s < 0xE0)) |
||||
multibyte_status = 2; |
||||
else if (multibyte_status == 2) |
||||
multibyte_status = 1; |
||||
if (stat < 2 && |
||||
character >= 0x80) |
||||
{ |
||||
if (character >= 0xfc) |
||||
stat = 6; |
||||
else if (character >= 0xf8) |
||||
stat = 5; |
||||
else if (character >= 0xf0) |
||||
stat = 4; |
||||
else if (character >= 0xe0) |
||||
stat = 3; |
||||
else if (character >= 0xc0) |
||||
stat = 2; |
||||
} |
||||
else if (stat > 2 && |
||||
character > 0x7f) |
||||
stat--; |
||||
else |
||||
multibyte_status = 0; |
||||
stat=0; |
||||
} |
||||
break; |
||||
|
||||
/* Chinese Big5(CP950) Support. */ |
||||
/* Shift-JIS Support. */ |
||||
case SJIS: |
||||
{ |
||||
if (stat < 2 && |
||||
character > 0x80 && |
||||
!(character > 0x9f && |
||||
character < 0xe0)) |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
/* Chinese Big5 Support. */ |
||||
case BIG5: |
||||
{ |
||||
if (multibyte_status < 2 && s > 0xA0) |
||||
multibyte_status = 2; |
||||
else if (multibyte_status == 2) |
||||
multibyte_status = 1; |
||||
if (stat < 2 && |
||||
character > 0xA0) |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
/* Chinese GBK Support. */ |
||||
case GBK: |
||||
{ |
||||
if (stat < 2 && |
||||
character > 0x7F) |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
|
||||
/* Korian UHC Support. */ |
||||
case UHC: |
||||
{ |
||||
if (stat < 2 && |
||||
character > 0x7F) |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
|
||||
/* EUC_JP Support */ |
||||
case EUC_JP: |
||||
{ |
||||
if (stat < 3 &&
|
||||
character == 0x8f) /* JIS X 0212 */ |
||||
stat = 3; |
||||
else |
||||
multibyte_status = 0; |
||||
if (stat != 2 &&
|
||||
(character == 0x8e || |
||||
character > 0xa0)) /* Half Katakana HighByte & Kanji HighByte */ |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
|
||||
/* EUC_CN, EUC_KR, JOHAB Support */ |
||||
case EUC_CN: |
||||
case EUC_KR: |
||||
case JOHAB: |
||||
{ |
||||
if (stat < 2 && |
||||
character > 0xa0) |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
case EUC_TW: |
||||
{ |
||||
if (stat < 4 && |
||||
character == 0x8e) |
||||
stat = 4; |
||||
else if (stat == 4 && |
||||
character > 0xa0) |
||||
stat = 3; |
||||
else if (stat == 3 || |
||||
stat < 2 && |
||||
character > 0xa0) |
||||
stat = 2; |
||||
else if (stat == 2) |
||||
stat = 1; |
||||
else |
||||
stat = 0; |
||||
} |
||||
break; |
||||
default: |
||||
multibyte_status = 0; |
||||
{ |
||||
stat = 0; |
||||
} |
||||
break; |
||||
} |
||||
return stat; |
||||
} |
||||
|
||||
|
||||
unsigned char * |
||||
pg_mbschr(const unsigned char *string, unsigned int character) |
||||
{ |
||||
int mb_st = 0; |
||||
unsigned char *s; |
||||
s = (unsigned char *) string; |
||||
|
||||
for(;;)
|
||||
{ |
||||
mb_st = pg_CS_stat(mb_st, (unsigned char) *s,PG_CCSC); |
||||
if (mb_st == 0 && (*s == character || *s == 0)) |
||||
break; |
||||
else |
||||
s++; |
||||
} |
||||
return (s); |
||||
} |
||||
|
||||
int |
||||
pg_mbslen(const unsigned char *string) |
||||
{ |
||||
unsigned char *s; |
||||
int len, cs_stat; |
||||
for (len = 0, cs_stat = 0, s = (unsigned char *) string; *s != 0; s++) |
||||
{ |
||||
cs_stat = pg_CS_stat(cs_stat,(unsigned int) *s, PG_CCSC); |
||||
if (cs_stat < 2) |
||||
len++; |
||||
} |
||||
return len; |
||||
} |
||||
|
||||
unsigned char * |
||||
pg_mbsinc(const unsigned char *current ) |
||||
{ |
||||
int mb_stat = 0; |
||||
if (*current != 0) |
||||
{ |
||||
mb_stat = (int) pg_CS_stat(mb_stat, *current, PG_CCSC); |
||||
if (mb_stat == 0) |
||||
mb_stat = 1; |
||||
return ((unsigned char *) current + mb_stat); |
||||
} |
||||
else |
||||
return NULL; |
||||
} |
||||
|
||||
void |
||||
CC_lookup_characterset(ConnectionClass *self) |
||||
{ |
||||
HSTMT hstmt; |
||||
StatementClass *stmt; |
||||
RETCODE result; |
||||
static char *func = "CC_lookup_characterset"; |
||||
|
||||
mylog("%s: entering...\n", func); |
||||
PG_SCSS = malloc(MAX_CHARACTERSET_NAME); |
||||
PG_CCSS = malloc(MAX_CHARACTERSET_NAME); |
||||
|
||||
result = PGAPI_AllocStmt(self, &hstmt); |
||||
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||
return; |
||||
stmt = (StatementClass *) hstmt; |
||||
|
||||
result = PGAPI_ExecDirect(hstmt, "Show Client_Encoding", SQL_NTS); |
||||
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||
{ |
||||
PGAPI_FreeStmt(hstmt, SQL_DROP); |
||||
return; |
||||
} |
||||
result = PGAPI_AllocStmt(self, &hstmt); |
||||
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||
return; |
||||
stmt = (StatementClass *) hstmt; |
||||
|
||||
result = PGAPI_ExecDirect(hstmt, "Show Server_Encoding", SQL_NTS); |
||||
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) |
||||
{ |
||||
PGAPI_FreeStmt(hstmt, SQL_DROP); |
||||
return; |
||||
} |
||||
#ifdef _DEBUG |
||||
qlog("multibyte_client_encoding = %d s = 0x%02X multibyte_stat = %d\n", multibyte_client_encoding, s, multibyte_status); |
||||
#endif |
||||
return (multibyte_status); |
||||
|
||||
strcpy(PG_SCSS , pg_CS_name(PG_SCSC = pg_CS_code(PG_SCSS))); |
||||
strcpy(PG_CCSS , pg_CS_name(PG_CCSC = pg_CS_code(PG_CCSS))); |
||||
|
||||
qlog(" [ Server encoding = '%s' (code = %d), Client encoding = '%s' (code = %d) ]\n", PG_SCSS, PG_SCSC, PG_CCSS, PG_CCSC); |
||||
} |
||||
|
Loading…
Reference in new issue