mirror of https://github.com/postgres/postgres
Included are patches intended for allowing PostgreSQL to handle multi-byte charachter sets such as EUC(Extende Unix Code), Unicode and Mule internal code. With the MB patch you can use multi-byte character sets in regexp and LIKE. The encoding system chosen is determined at the compile time. To enable the MB extension, you need to define a variable "MB" in Makefile.global or in Makefile.custom. For further information please take a look at README.mb under doc directory. (Note that unlike "jp patch" I do not use modified GNU regexp any more. I changed Henry Spencer's regexp coming with PostgreSQL.)REL6_4
parent
31a925c4d0
commit
661ecf3c48
@ -0,0 +1,67 @@ |
|||||||
|
postgresql 6.3 multi-byte(MB) patch PL2 README Mar 10 1998 |
||||||
|
|
||||||
|
Tatsuo Ishii |
||||||
|
t-ishii@sra.co.jp |
||||||
|
http://www.sra.co.jp/people/t-ishii/PostgreSQL/ |
||||||
|
|
||||||
|
Introduction |
||||||
|
|
||||||
|
MB patch is intended for allowing PostgreSQL to handle multi-byte |
||||||
|
charachter sets such as EUC(Extende Unix Code), Unicode and Mule |
||||||
|
internal code. With the MB patch you can use multi-byte character sets |
||||||
|
in regexp and LIKE. The encoding system chosen is determined at the |
||||||
|
compile time. |
||||||
|
|
||||||
|
The patch also fixes some problems concerning with 8-bit single byte |
||||||
|
character sets including ISO8859. (I would not say all of problems |
||||||
|
have been fixed. I just confirmed that the regression test ran fine |
||||||
|
and a few French characters could be used with the patch. Please let |
||||||
|
me know if you find any problem while using 8-bit characters) |
||||||
|
|
||||||
|
How to use |
||||||
|
|
||||||
|
After applying the MB patch, create src/Makefile.custom with a line |
||||||
|
including: |
||||||
|
|
||||||
|
MB=encoding_system |
||||||
|
|
||||||
|
where encoding_system is one of: |
||||||
|
|
||||||
|
EUC_JP Japanese EUC |
||||||
|
EUC_CN Chinese EUC |
||||||
|
EUC_KR Korean EUC |
||||||
|
EUC_TW Taiwan EUC |
||||||
|
UNICODE Unicode(UTF-8) |
||||||
|
MULE_INTERNAL Mule internal |
||||||
|
|
||||||
|
Example: |
||||||
|
|
||||||
|
% cat Makefile.custom |
||||||
|
MB=EUC_JP |
||||||
|
|
||||||
|
If MB is not defined, nothing is changed except better supporting for |
||||||
|
8-bit single byte character sets. |
||||||
|
|
||||||
|
References |
||||||
|
|
||||||
|
These are good sources to start learning various kind of encoding |
||||||
|
systems. |
||||||
|
|
||||||
|
ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf |
||||||
|
Detailed explanations of EUC_JP, EUC_CN, EUC_KR, EUC_TW |
||||||
|
appear in section 3.2. |
||||||
|
|
||||||
|
Unicode: http://www.unicode.org/ |
||||||
|
The homepage of UNICODE. |
||||||
|
|
||||||
|
RFC 2044 |
||||||
|
UTF-8 is defined here. |
||||||
|
|
||||||
|
History |
||||||
|
|
||||||
|
Mar 10, 1998 PL2 released |
||||||
|
* add regression test for EUC_JP, EUC_CN and MULE_INTERNAL |
||||||
|
* add an English document (this file) |
||||||
|
* fix problems concerning 8-bit single byte characters |
||||||
|
|
||||||
|
Mar 1, 1998 PL1 released |
||||||
@ -0,0 +1,33 @@ |
|||||||
|
/*
|
||||||
|
* testing of utf2wchar() |
||||||
|
* $Id: utftest.c,v 1.1 1998/03/15 07:38:37 scrappy Exp $ |
||||||
|
*/ |
||||||
|
#include <regex/regex.h> |
||||||
|
#include <regex/utils.h> |
||||||
|
#include <regex/regex2.h> |
||||||
|
|
||||||
|
#include <regex/pg_wchar.h> |
||||||
|
|
||||||
|
main() |
||||||
|
{ |
||||||
|
/* Example 1 from RFC2044 */ |
||||||
|
char utf1[] = {0x41,0xe2,0x89,0xa2,0xce,0x91,0x2e,0}; |
||||||
|
/* Example 2 from RFC2044 */ |
||||||
|
char utf2[] = {0x48,0x69,0x20,0x4d,0x6f,0x6d,0x20,0xe2,0x98,0xba,0x21,0}; |
||||||
|
/* Example 3 from RFC2044 */ |
||||||
|
char utf3[] = {0xe6,0x97,0xa5,0xe6,0x9c,0xac,0xe8,0xaa,0x9e,0}; |
||||||
|
char *utf[] = {utf1,utf2,utf3}; |
||||||
|
pg_wchar ucs[128]; |
||||||
|
pg_wchar *p; |
||||||
|
int i; |
||||||
|
|
||||||
|
for (i=0;i<sizeof(utf)/sizeof(char *);i++) { |
||||||
|
pg_utf2wchar(utf[i],ucs); |
||||||
|
p = ucs; |
||||||
|
while(*p) { |
||||||
|
printf("%04x ",*p); |
||||||
|
p++; |
||||||
|
} |
||||||
|
printf("\n"); |
||||||
|
} |
||||||
|
} |
||||||
@ -0,0 +1,348 @@ |
|||||||
|
/*
|
||||||
|
* misc conversion functions between pg_wchar and other encodings. |
||||||
|
* Tatsuo Ishii |
||||||
|
* $Id: utils.c,v 1.1 1998/03/15 07:38:39 scrappy Exp $ |
||||||
|
*/ |
||||||
|
#include <regex/pg_wchar.h> |
||||||
|
/*
|
||||||
|
* convert EUC to pg_wchar (EUC process code) |
||||||
|
* caller should allocate enough space for "to" |
||||||
|
*/ |
||||||
|
static void pg_euc2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
while (*from) { |
||||||
|
if (*from == SS2) { |
||||||
|
from++; |
||||||
|
*to = *from++; |
||||||
|
} else if (*from == SS3) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= 0x3f & *from++; |
||||||
|
} else if (*from & 0x80) { |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
} else { |
||||||
|
*to = *from++; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_eucjp2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
pg_euc2wchar(from,to); |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_euckr2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
pg_euc2wchar(from,to); |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_eucch2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
while (*from) { |
||||||
|
if (*from == SS2) { |
||||||
|
from++; |
||||||
|
*to = 0x3f00 & (*from++ << 8); |
||||||
|
*to = *from++; |
||||||
|
} else if (*from == SS3) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= 0x3f & *from++; |
||||||
|
} else if (*from & 0x80) { |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
} else { |
||||||
|
*to = *from++; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_euccn2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
while (*from) { |
||||||
|
if (*from == SS2) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
} else if (*from == SS3) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= 0x3f & *from++; |
||||||
|
} else if (*from & 0x80) { |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
} else { |
||||||
|
*to = *from++; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* convert UTF-8 to pg_wchar (UCS-2) |
||||||
|
* caller should allocate enough space for "to" |
||||||
|
*/ |
||||||
|
static void pg_utf2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
unsigned char c1,c2,c3; |
||||||
|
while (*from) { |
||||||
|
if ((*from & 0x80) == 0) { |
||||||
|
*to = *from++; |
||||||
|
} else if ((*from & 0xe0) == 0xc0) { |
||||||
|
c1 = *from++ & 0x1f; |
||||||
|
c2 = *from++ & 0x3f; |
||||||
|
*to = c1 << 6; |
||||||
|
*to |= c2; |
||||||
|
} else if ((*from & 0xe0) == 0xe0) { |
||||||
|
c1 = *from++ & 0x0f; |
||||||
|
c2 = *from++ & 0x3f; |
||||||
|
c3 = *from++ & 0x3f; |
||||||
|
*to = c1 << 12; |
||||||
|
*to |= c2 << 6; |
||||||
|
*to |= c3; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* convert mule internal code to pg_wchar. |
||||||
|
* in this case pg_wchar consists of following 4 bytes: |
||||||
|
* |
||||||
|
* 0x00(unused) |
||||||
|
* 0x00(ASCII)|leading character (one of LC1, LC12, LC2 or LC22) |
||||||
|
* 0x00(ASCII,1 byte code)|other than 0x00(2 byte code) |
||||||
|
* the lowest byte of the code |
||||||
|
* |
||||||
|
* note that Type N (variable length byte encoding) cannot be represented by |
||||||
|
* this schema. sorry. |
||||||
|
* caller should allocate enough space for "to" |
||||||
|
*/ |
||||||
|
static void pg_mule2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
while (*from) { |
||||||
|
if (IS_LC1(*from)) { |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++; |
||||||
|
} else if (IS_LCPRV1(*from)) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++; |
||||||
|
} else if (IS_LC2(*from)) { |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
} else if (IS_LCPRV2(*from)) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
} else { /* assume ASCII */ |
||||||
|
*to = *from++; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* convert EUC to pg_wchar (EUC process code) |
||||||
|
* caller should allocate enough space for "to" |
||||||
|
* len: length of from. |
||||||
|
* "from" not necessarily null terminated. |
||||||
|
*/ |
||||||
|
static void pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
while (*from && len > 0) { |
||||||
|
if (*from == SS2) { |
||||||
|
from++; |
||||||
|
len--; |
||||||
|
*to = 0xff & *from++; |
||||||
|
len--; |
||||||
|
} else if (*from == SS3) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= 0x3f & *from++; |
||||||
|
len -= 3; |
||||||
|
} else if (*from & 0x80) { |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
len -= 2; |
||||||
|
} else { |
||||||
|
*to = *from++; |
||||||
|
len--; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_eucjp2wchar_with_len |
||||||
|
(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
pg_euc2wchar_with_len(from,to,len); |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_euckr2wchar_with_len |
||||||
|
(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
pg_euc2wchar_with_len(from,to,len); |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_eucch2wchar_with_len |
||||||
|
(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
while (*from && len > 0) { |
||||||
|
if (*from == SS2) { |
||||||
|
from++; |
||||||
|
len--; |
||||||
|
*to = 0x3f00 & (*from++ << 8); |
||||||
|
*to = *from++; |
||||||
|
len -= 2; |
||||||
|
} else if (*from == SS3) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= 0x3f & *from++; |
||||||
|
len -= 3; |
||||||
|
} else if (*from & 0x80) { |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
len -= 2; |
||||||
|
} else { |
||||||
|
*to = *from++; |
||||||
|
len--; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static void pg_euccn2wchar_with_len |
||||||
|
(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
while (*from && len > 0) { |
||||||
|
if (*from == SS2) { |
||||||
|
from++; |
||||||
|
len--; |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
len -= 3; |
||||||
|
} else if (*from == SS3) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= 0x3f & *from++; |
||||||
|
len -= 3; |
||||||
|
} else if (*from & 0x80) { |
||||||
|
*to = *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
len -= 2; |
||||||
|
} else { |
||||||
|
*to = *from++; |
||||||
|
len--; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* convert UTF-8 to pg_wchar (UCS-2) |
||||||
|
* caller should allocate enough space for "to" |
||||||
|
* len: length of from. |
||||||
|
* "from" not necessarily null terminated. |
||||||
|
*/ |
||||||
|
static void pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
unsigned char c1,c2,c3; |
||||||
|
while (*from && len > 0) { |
||||||
|
if ((*from & 0x80) == 0) { |
||||||
|
*to = *from++; |
||||||
|
len--; |
||||||
|
} else if ((*from & 0xe0) == 0xc0) { |
||||||
|
c1 = *from++ & 0x1f; |
||||||
|
c2 = *from++ & 0x3f; |
||||||
|
len -= 2; |
||||||
|
*to = c1 << 6; |
||||||
|
*to |= c2; |
||||||
|
} else if ((*from & 0xe0) == 0xe0) { |
||||||
|
c1 = *from++ & 0x0f; |
||||||
|
c2 = *from++ & 0x3f; |
||||||
|
c3 = *from++ & 0x3f; |
||||||
|
len -= 3; |
||||||
|
*to = c1 << 12; |
||||||
|
*to |= c2 << 6; |
||||||
|
*to |= c3; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* convert mule internal code to pg_wchar |
||||||
|
* caller should allocate enough space for "to" |
||||||
|
* len: length of from. |
||||||
|
* "from" not necessarily null terminated. |
||||||
|
*/ |
||||||
|
static void pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
while (*from && len > 0) { |
||||||
|
if (IS_LC1(*from)) { |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++; |
||||||
|
len -= 2; |
||||||
|
} else if (IS_LCPRV1(*from)) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++; |
||||||
|
len -= 3; |
||||||
|
} else if (IS_LC2(*from)) { |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
len -= 3; |
||||||
|
} else if (IS_LCPRV2(*from)) { |
||||||
|
from++; |
||||||
|
*to = *from++ << 16; |
||||||
|
*to |= *from++ << 8; |
||||||
|
*to |= *from++; |
||||||
|
len -= 4; |
||||||
|
} else { /* assume ASCII */ |
||||||
|
*to = (unsigned char)*from++; |
||||||
|
len--; |
||||||
|
} |
||||||
|
to++; |
||||||
|
} |
||||||
|
*to = 0; |
||||||
|
} |
||||||
|
|
||||||
|
typedef struct { |
||||||
|
void (*mb2wchar)(); |
||||||
|
void (*mb2wchar_with_len)(); |
||||||
|
} pg_wchar_tbl; |
||||||
|
|
||||||
|
static pg_wchar_tbl pg_wchar_table[] = { |
||||||
|
{pg_eucjp2wchar, pg_eucjp2wchar_with_len}, |
||||||
|
{pg_eucch2wchar, pg_eucch2wchar_with_len}, |
||||||
|
{pg_euckr2wchar, pg_euckr2wchar_with_len}, |
||||||
|
{pg_euccn2wchar, pg_euccn2wchar_with_len}, |
||||||
|
{pg_utf2wchar, pg_utf2wchar_with_len}, |
||||||
|
{pg_mule2wchar, pg_mule2wchar_with_len}}; |
||||||
|
|
||||||
|
void pg_mb2wchar(const unsigned char *from, pg_wchar *to) |
||||||
|
{ |
||||||
|
(*pg_wchar_table[MB].mb2wchar)(from,to); |
||||||
|
} |
||||||
|
|
||||||
|
void pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) |
||||||
|
{ |
||||||
|
(*pg_wchar_table[MB].mb2wchar_with_len)(from,to,len); |
||||||
|
} |
||||||
@ -0,0 +1,48 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright (c) 1990, 1993 |
||||||
|
* The Regents of the University of California. All rights reserved. |
||||||
|
* |
||||||
|
* This code is derived from software contributed to Berkeley by |
||||||
|
* Chris Torek. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* 3. All advertising materials mentioning features or use of this software |
||||||
|
* must display the following acknowledgement: |
||||||
|
* This product includes software developed by the University of |
||||||
|
* California, Berkeley and its contributors. |
||||||
|
* 4. Neither the name of the University nor the names of its contributors |
||||||
|
* may be used to endorse or promote products derived from this software |
||||||
|
* without specific prior written permission. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <regex/pg_wchar.h> |
||||||
|
|
||||||
|
int |
||||||
|
pg_char_and_wchar_strcmp(s1, s2) |
||||||
|
register const char *s1; |
||||||
|
register const pg_wchar *s2; |
||||||
|
{ |
||||||
|
while ((pg_wchar)*s1 == *s2++) |
||||||
|
if (*s1++ == 0) |
||||||
|
return (0); |
||||||
|
return (*(const unsigned char *)s1 - *(const pg_wchar *)(s2 - 1)); |
||||||
|
} |
||||||
@ -0,0 +1,83 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 1989, 1993 |
||||||
|
* The Regents of the University of California. All rights reserved. |
||||||
|
* |
||||||
|
* This code is derived from FreeBSD 2.2.1-RELEASE software. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* 3. All advertising materials mentioning features or use of this software |
||||||
|
* must display the following acknowledgement: |
||||||
|
* This product includes software developed by the University of |
||||||
|
* California, Berkeley and its contributors. |
||||||
|
* 4. Neither the name of the University nor the names of its contributors |
||||||
|
* may be used to endorse or promote products derived from this software |
||||||
|
* without specific prior written permission. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <regex/pg_wchar.h> |
||||||
|
|
||||||
|
int |
||||||
|
pg_wchar_strncmp(s1, s2, n) |
||||||
|
register const pg_wchar *s1, *s2; |
||||||
|
register size_t n; |
||||||
|
{ |
||||||
|
|
||||||
|
if (n == 0) |
||||||
|
return (0); |
||||||
|
do { |
||||||
|
if (*s1 != *s2++) |
||||||
|
return (*(const pg_wchar *)s1 - |
||||||
|
*(const pg_wchar *)(s2 - 1)); |
||||||
|
if (*s1++ == 0) |
||||||
|
break; |
||||||
|
} while (--n != 0); |
||||||
|
return (0); |
||||||
|
} |
||||||
|
|
||||||
|
int |
||||||
|
pg_char_and_wchar_strncmp(s1, s2, n) |
||||||
|
register const char *s1; |
||||||
|
register const pg_wchar *s2; |
||||||
|
register size_t n; |
||||||
|
{ |
||||||
|
|
||||||
|
if (n == 0) |
||||||
|
return (0); |
||||||
|
do { |
||||||
|
if ((pg_wchar )*s1 != *s2++) |
||||||
|
return (*(const pg_wchar *)s1 - |
||||||
|
*(const pg_wchar *)(s2 - 1)); |
||||||
|
if (*s1++ == 0) |
||||||
|
break; |
||||||
|
} while (--n != 0); |
||||||
|
return (0); |
||||||
|
} |
||||||
|
|
||||||
|
size_t |
||||||
|
pg_wchar_strlen(str) |
||||||
|
const pg_wchar *str; |
||||||
|
{ |
||||||
|
register const pg_wchar *s; |
||||||
|
|
||||||
|
for (s = str; *s; ++s); |
||||||
|
return(s - str); |
||||||
|
} |
||||||
@ -0,0 +1,44 @@ |
|||||||
|
/* $Id: pg_wchar.h,v 1.1 1998/03/15 07:38:47 scrappy Exp $ */ |
||||||
|
|
||||||
|
#ifndef PG_WCHAR_H |
||||||
|
#define PG_WCHAR_H |
||||||
|
|
||||||
|
#include <sys/types.h> |
||||||
|
|
||||||
|
#define EUC_JP 0 /* EUC for Japanese */ |
||||||
|
#define EUC_CN 1 /* EUC for Chinese */ |
||||||
|
#define EUC_KR 2 /* EUC for Korean */ |
||||||
|
#define EUC_TW 3 /* EUC for Taiwan */ |
||||||
|
#define UNICODE 4 /* Unicode UTF-8 */ |
||||||
|
#define MULE_INTERNAL 5 /* Mule internal code */ |
||||||
|
|
||||||
|
#ifdef MB |
||||||
|
typedef unsigned int pg_wchar; |
||||||
|
#else |
||||||
|
#define pg_wchar char |
||||||
|
#endif |
||||||
|
|
||||||
|
/*
|
||||||
|
* various definitions for EUC |
||||||
|
*/ |
||||||
|
#define SS2 0x8e /* single shift 2 */ |
||||||
|
#define SS3 0x8f /* single shift 3 */ |
||||||
|
|
||||||
|
/*
|
||||||
|
* various definitions for mule internal code |
||||||
|
*/ |
||||||
|
#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8f) |
||||||
|
#define IS_LCPRV1(c) ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b) |
||||||
|
#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) |
||||||
|
#define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d) |
||||||
|
|
||||||
|
#ifdef MB |
||||||
|
extern void pg_mb2wchar(const unsigned char *, pg_wchar *); |
||||||
|
extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int); |
||||||
|
extern int pg_char_and_wchar_strcmp(const char *, const pg_wchar *); |
||||||
|
extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t); |
||||||
|
extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t); |
||||||
|
extern size_t pg_wchar_strlen(const pg_wchar *); |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif |
||||||
Loading…
Reference in new issue