@ -243,9 +243,7 @@ typedef struct TParser
/* string and position information */
/* string and position information */
char * str ; /* multibyte string */
char * str ; /* multibyte string */
int lenstr ; /* length of mbstring */
int lenstr ; /* length of mbstring */
wchar_t * wstr ; /* wide character string */
pg_wchar * pgwstr ; /* wide character string for C-locale */
pg_wchar * pgwstr ; /* wide character string for C-locale */
bool usewide ;
/* State of parse */
/* State of parse */
int charmaxlen ;
int charmaxlen ;
@ -293,33 +291,8 @@ TParserInit(char *str, int len)
prs - > charmaxlen = pg_database_encoding_max_length ( ) ;
prs - > charmaxlen = pg_database_encoding_max_length ( ) ;
prs - > str = str ;
prs - > str = str ;
prs - > lenstr = len ;
prs - > lenstr = len ;
/*
* Use wide char code only when max encoding length > 1.
*/
if ( prs - > charmaxlen > 1 )
{
locale_t mylocale = 0 ; /* TODO */
prs - > usewide = true ;
if ( database_ctype_is_c )
{
/*
* char2wchar doesn ' t work for C - locale and sizeof ( pg_wchar ) could
* be different from sizeof ( wchar_t )
*/
prs - > pgwstr = ( pg_wchar * ) palloc ( sizeof ( pg_wchar ) * ( prs - > lenstr + 1 ) ) ;
prs - > pgwstr = ( pg_wchar * ) palloc ( sizeof ( pg_wchar ) * ( prs - > lenstr + 1 ) ) ;
pg_mb2wchar_with_len ( prs - > str , prs - > pgwstr , prs - > lenstr ) ;
pg_mb2wchar_with_len ( prs - > str , prs - > pgwstr , prs - > lenstr ) ;
}
else
{
prs - > wstr = ( wchar_t * ) palloc ( sizeof ( wchar_t ) * ( prs - > lenstr + 1 ) ) ;
char2wchar ( prs - > wstr , prs - > lenstr + 1 , prs - > str , prs - > lenstr ,
mylocale ) ;
}
}
else
prs - > usewide = false ;
prs - > state = newTParserPosition ( NULL ) ;
prs - > state = newTParserPosition ( NULL ) ;
prs - > state - > state = TPS_Base ;
prs - > state - > state = TPS_Base ;
@ -350,12 +323,9 @@ TParserCopyInit(const TParser *orig)
prs - > charmaxlen = orig - > charmaxlen ;
prs - > charmaxlen = orig - > charmaxlen ;
prs - > str = orig - > str + orig - > state - > posbyte ;
prs - > str = orig - > str + orig - > state - > posbyte ;
prs - > lenstr = orig - > lenstr - orig - > state - > posbyte ;
prs - > lenstr = orig - > lenstr - orig - > state - > posbyte ;
prs - > usewide = orig - > usewide ;
if ( orig - > pgwstr )
if ( orig - > pgwstr )
prs - > pgwstr = orig - > pgwstr + orig - > state - > poschar ;
prs - > pgwstr = orig - > pgwstr + orig - > state - > poschar ;
if ( orig - > wstr )
prs - > wstr = orig - > wstr + orig - > state - > poschar ;
prs - > state = newTParserPosition ( NULL ) ;
prs - > state = newTParserPosition ( NULL ) ;
prs - > state - > state = TPS_Base ;
prs - > state - > state = TPS_Base ;
@ -379,8 +349,6 @@ TParserClose(TParser *prs)
prs - > state = ptr ;
prs - > state = ptr ;
}
}
if ( prs - > wstr )
pfree ( prs - > wstr ) ;
if ( prs - > pgwstr )
if ( prs - > pgwstr )
pfree ( prs - > pgwstr ) ;
pfree ( prs - > pgwstr ) ;
@ -412,13 +380,9 @@ TParserCopyClose(TParser *prs)
/*
/*
* Character - type support functions , equivalent to is * macros , but
* Character - type support functions using the database default locale . If the
* working with any possible encodings and locales . Notes :
* locale is C , and the input character is non - ascii , the value to be returned
* - with multibyte encoding and C - locale isw * function may fail
* is determined by the ' nonascii ' macro argument .
* or give wrong result .
* - multibyte encoding and C - locale often are used for
* Asian languages .
* - if locale is C then we use pgwstr instead of wstr .
*/
*/
# define p_iswhat(type, nonascii) \
# define p_iswhat(type, nonascii) \
@ -426,19 +390,13 @@ TParserCopyClose(TParser *prs)
static int \
static int \
p_is # # type ( TParser * prs ) \
p_is # # type ( TParser * prs ) \
{ \
{ \
pg_locale_t locale = pg_database_locale ( ) ; \
pg_wchar wc ; \
Assert ( prs - > state ) ; \
Assert ( prs - > state ) ; \
if ( prs - > usewide ) \
wc = prs - > pgwstr [ prs - > state - > poschar ] ; \
{ \
if ( prs - > charmaxlen > 1 & & locale - > ctype_is_c & & wc > 0x7f ) \
if ( prs - > pgwstr ) \
{ \
unsigned int c = * ( prs - > pgwstr + prs - > state - > poschar ) ; \
if ( c > 0x7f ) \
return nonascii ; \
return nonascii ; \
return is # # type ( c ) ; \
return pg_isw # # type ( wc , pg_database_locale ( ) ) ; \
} \
return isw # # type ( * ( prs - > wstr + prs - > state - > poschar ) ) ; \
} \
return is # # type ( * ( unsigned char * ) ( prs - > str + prs - > state - > posbyte ) ) ; \
} \
} \
\
\
static int \
static int \
@ -703,7 +661,7 @@ p_isspecial(TParser *prs)
* Check that only in utf encoding , because other encodings aren ' t
* Check that only in utf encoding , because other encodings aren ' t
* supported by postgres or even exists .
* supported by postgres or even exists .
*/
*/
if ( GetDatabaseEncoding ( ) = = PG_UTF8 & & prs - > usewide )
if ( GetDatabaseEncoding ( ) = = PG_UTF8 )
{
{
static const pg_wchar strange_letter [ ] = {
static const pg_wchar strange_letter [ ] = {
/*
/*
@ -944,10 +902,7 @@ p_isspecial(TParser *prs)
* StopMiddle ;
* StopMiddle ;
pg_wchar c ;
pg_wchar c ;
if ( prs - > pgwstr )
c = * ( prs - > pgwstr + prs - > state - > poschar ) ;
c = * ( prs - > pgwstr + prs - > state - > poschar ) ;
else
c = ( pg_wchar ) * ( prs - > wstr + prs - > state - > poschar ) ;
while ( StopLow < StopHigh )
while ( StopLow < StopHigh )
{
{