|
|
|
|
@ -6,7 +6,7 @@ |
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California |
|
|
|
|
* |
|
|
|
|
* IDENTIFICATION |
|
|
|
|
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $ |
|
|
|
|
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $ |
|
|
|
|
* |
|
|
|
|
*------------------------------------------------------------------------- |
|
|
|
|
*/ |
|
|
|
|
@ -58,6 +58,8 @@ static void sjis2mic(unsigned char *sjis, unsigned char *p, int len); |
|
|
|
|
static void mic2sjis(unsigned char *mic, unsigned char *p, int len); |
|
|
|
|
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len); |
|
|
|
|
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len); |
|
|
|
|
static void euc_jp2sjis(unsigned char *mic, unsigned char *p, int len); |
|
|
|
|
static void sjis2euc_jp(unsigned char *mic, unsigned char *p, int len); |
|
|
|
|
|
|
|
|
|
Datum |
|
|
|
|
euc_jp_to_sjis(PG_FUNCTION_ARGS) |
|
|
|
|
@ -65,16 +67,12 @@ euc_jp_to_sjis(PG_FUNCTION_ARGS) |
|
|
|
|
unsigned char *src = PG_GETARG_CSTRING(2); |
|
|
|
|
unsigned char *dest = PG_GETARG_CSTRING(3); |
|
|
|
|
int len = PG_GETARG_INT32(4); |
|
|
|
|
unsigned char *buf; |
|
|
|
|
|
|
|
|
|
Assert(PG_GETARG_INT32(0) == PG_EUC_JP); |
|
|
|
|
Assert(PG_GETARG_INT32(1) == PG_SJIS); |
|
|
|
|
Assert(len >= 0); |
|
|
|
|
|
|
|
|
|
buf = palloc(len * ENCODING_GROWTH_RATE); |
|
|
|
|
euc_jp2mic(src, buf, len); |
|
|
|
|
mic2sjis(buf, dest, strlen(buf)); |
|
|
|
|
pfree(buf); |
|
|
|
|
euc_jp2sjis(src, dest, len); |
|
|
|
|
|
|
|
|
|
PG_RETURN_VOID(); |
|
|
|
|
} |
|
|
|
|
@ -85,16 +83,12 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS) |
|
|
|
|
unsigned char *src = PG_GETARG_CSTRING(2); |
|
|
|
|
unsigned char *dest = PG_GETARG_CSTRING(3); |
|
|
|
|
int len = PG_GETARG_INT32(4); |
|
|
|
|
unsigned char *buf; |
|
|
|
|
|
|
|
|
|
Assert(PG_GETARG_INT32(0) == PG_SJIS); |
|
|
|
|
Assert(PG_GETARG_INT32(1) == PG_EUC_JP); |
|
|
|
|
Assert(len >= 0); |
|
|
|
|
|
|
|
|
|
buf = palloc(len * ENCODING_GROWTH_RATE); |
|
|
|
|
sjis2mic(src, buf, len); |
|
|
|
|
mic2euc_jp(buf, dest, strlen(buf)); |
|
|
|
|
pfree(buf); |
|
|
|
|
sjis2euc_jp(src, dest, len); |
|
|
|
|
|
|
|
|
|
PG_RETURN_VOID(); |
|
|
|
|
} |
|
|
|
|
@ -454,3 +448,199 @@ mic2euc_jp(unsigned char *mic, unsigned char *p, int len) |
|
|
|
|
} |
|
|
|
|
*p = '\0'; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* EUC_JP -> SJIS |
|
|
|
|
*/ |
|
|
|
|
static void |
|
|
|
|
euc_jp2sjis(unsigned char *euc, unsigned char *p, int len) |
|
|
|
|
{ |
|
|
|
|
int c1, |
|
|
|
|
c2, |
|
|
|
|
k; |
|
|
|
|
unsigned char *euc_end = euc + len; |
|
|
|
|
|
|
|
|
|
while (euc_end >= euc && (c1 = *euc++)) |
|
|
|
|
{ |
|
|
|
|
if(c1 < 0x80) |
|
|
|
|
{ |
|
|
|
|
/* should be ASCII */ |
|
|
|
|
*p++ = c1; |
|
|
|
|
} |
|
|
|
|
else if (c1 == SS2) |
|
|
|
|
{ |
|
|
|
|
/* hankaku kana? */ |
|
|
|
|
*p++ = *euc++; |
|
|
|
|
} |
|
|
|
|
else if (c1 == SS3) |
|
|
|
|
{ |
|
|
|
|
/* JIS X0212 kanji? */ |
|
|
|
|
c1 = *euc++; |
|
|
|
|
c2 = *euc++; |
|
|
|
|
k = c1 << 8 | c2; |
|
|
|
|
if (k >= 0xf5a1) |
|
|
|
|
{ |
|
|
|
|
/* UDC2 */ |
|
|
|
|
c1 -= 0x54; |
|
|
|
|
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74; |
|
|
|
|
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
int i, k2; |
|
|
|
|
|
|
|
|
|
/* IBM kanji */ |
|
|
|
|
for (i = 0;; i++) |
|
|
|
|
{ |
|
|
|
|
k2 = ibmkanji[i].euc & 0xffff; |
|
|
|
|
if (k2 == 0xffff) |
|
|
|
|
{ |
|
|
|
|
*p++ = PGSJISALTCODE >> 8; |
|
|
|
|
*p++ = PGSJISALTCODE & 0xff; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
if (k2 == k) |
|
|
|
|
{ |
|
|
|
|
k = ibmkanji[i].sjis; |
|
|
|
|
*p++ = k >> 8; |
|
|
|
|
*p++ = k & 0xff; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}
|
|
|
|
|
else |
|
|
|
|
{
|
|
|
|
|
/* JIS X0208 kanji? */ |
|
|
|
|
c2 = *euc++; |
|
|
|
|
k = (c1 << 8) | (c2 & 0xff); |
|
|
|
|
if (k >= 0xf5a1) |
|
|
|
|
{ |
|
|
|
|
/* UDC1 */ |
|
|
|
|
c1 -= 0x54; |
|
|
|
|
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1); |
|
|
|
|
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
*p = '\0'; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* SJIS ---> EUC_JP |
|
|
|
|
*/ |
|
|
|
|
static void |
|
|
|
|
sjis2euc_jp(unsigned char *sjis, unsigned char *p, int len) |
|
|
|
|
{ |
|
|
|
|
int c1, |
|
|
|
|
c2, |
|
|
|
|
i, |
|
|
|
|
k, |
|
|
|
|
k2; |
|
|
|
|
unsigned char *sjis_end = sjis + len; |
|
|
|
|
|
|
|
|
|
while (sjis_end >= sjis && (c1 = *sjis++)) |
|
|
|
|
{ |
|
|
|
|
if(c1 < 0x80) |
|
|
|
|
{ |
|
|
|
|
/* should be ASCII */ |
|
|
|
|
*p++ = c1; |
|
|
|
|
} |
|
|
|
|
else if (c1 >= 0xa1 && c1 <= 0xdf) |
|
|
|
|
{ |
|
|
|
|
/* JIS X0201 (1 byte kana) */ |
|
|
|
|
*p++ = SS2; |
|
|
|
|
*p++ = c1; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* JIS X0208, X0212, user defined extended characters |
|
|
|
|
*/ |
|
|
|
|
c2 = *sjis++; |
|
|
|
|
k = (c1 << 8) + c2; |
|
|
|
|
if (k >= 0xed40 && k < 0xf040) |
|
|
|
|
{ |
|
|
|
|
/* NEC selection IBM kanji */ |
|
|
|
|
for (i = 0;; i++) |
|
|
|
|
{ |
|
|
|
|
k2 = ibmkanji[i].nec; |
|
|
|
|
if (k2 == 0xffff) |
|
|
|
|
break; |
|
|
|
|
if (k2 == k) |
|
|
|
|
{ |
|
|
|
|
k = ibmkanji[i].sjis; |
|
|
|
|
c1 = (k >> 8) & 0xff; |
|
|
|
|
c2 = k & 0xff; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (k < 0xeb3f) |
|
|
|
|
{ |
|
|
|
|
/* JIS X0208 */ |
|
|
|
|
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e); |
|
|
|
|
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); |
|
|
|
|
} |
|
|
|
|
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc)) |
|
|
|
|
{ |
|
|
|
|
/* NEC selection IBM kanji - Other undecided justice */ |
|
|
|
|
*p++ = PGEUCALTCODE >> 8; |
|
|
|
|
*p++ = PGEUCALTCODE & 0xff; |
|
|
|
|
} |
|
|
|
|
else if (k >= 0xf040 && k < 0xf540) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 - |
|
|
|
|
* 0x7e7e EUC 0xf5a1 - 0xfefe |
|
|
|
|
*/ |
|
|
|
|
c1 -= 0x6f; |
|
|
|
|
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); |
|
|
|
|
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); |
|
|
|
|
} |
|
|
|
|
else if (k >= 0xf540 && k < 0xfa40) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 - |
|
|
|
|
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe |
|
|
|
|
*/ |
|
|
|
|
*p++ = SS3; |
|
|
|
|
c1 -= 0x74; |
|
|
|
|
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); |
|
|
|
|
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); |
|
|
|
|
} |
|
|
|
|
else if (k >= 0xfa40) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* mapping IBM kanji to X0208 and X0212 |
|
|
|
|
* |
|
|
|
|
*/ |
|
|
|
|
for (i = 0;; i++) |
|
|
|
|
{ |
|
|
|
|
k2 = ibmkanji[i].sjis; |
|
|
|
|
if (k2 == 0xffff) |
|
|
|
|
break; |
|
|
|
|
if (k2 == k) |
|
|
|
|
{ |
|
|
|
|
k = ibmkanji[i].euc; |
|
|
|
|
if (k >= 0x8f0000) |
|
|
|
|
{ |
|
|
|
|
*p++ = SS3; |
|
|
|
|
*p++ = 0x80 | ((k & 0xff00) >> 8); |
|
|
|
|
*p++ = 0x80 | (k & 0xff); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
*p++ = 0x80 | (k >> 8); |
|
|
|
|
*p++ = 0x80 | (k & 0xff); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
*p = '\0'; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|