@ -302,47 +302,62 @@ compare4(const void *p1, const void *p2)
}
/*
* convert 32 bit wide character to mutibye stream pointed to by iso
* store 32 bit character representation into multibyte stream
*/
static unsigned char *
se t_is o_code ( unsigned char * iso , uint32 code )
static inline unsigned char *
store _coded_char ( unsigned char * dest , uint32 code )
{
if ( code & 0xff000000 )
* iso + + = code > > 24 ;
* dest + + = code > > 24 ;
if ( code & 0x00ff0000 )
* iso + + = ( code & 0x00ff0000 ) > > 16 ;
* dest + + = code > > 16 ;
if ( code & 0x0000ff00 )
* iso + + = ( code & 0x0000ff00 ) > > 8 ;
* dest + + = code > > 8 ;
if ( code & 0x000000ff )
* iso + + = code & 0x000000ff ;
return iso ;
* dest + + = code ;
return dest ;
}
/*
* UTF8 - - - > local code
*
* utf : input UTF8 string ( need not be null - terminated ) .
* utf : input string in UTF8 encoding ( need not be null - terminated )
* len : length of input string ( in bytes )
* iso : pointer to the output area ( must be large enough ! )
* map : the conversion map .
* cmap : the conversion map for combined characters .
* ( optional )
* size1 : the size of the conversion map .
* size2 : the size of the conversion map for combined characters
* ( optional )
* encoding : the PG identifier for the local encoding .
* len : length of input string .
( output string will be null - terminated )
* map : conversion map for single characters
* mapsize : number of entries in the conversion map
* cmap : conversion map for combined characters
* ( optional , pass NULL if none )
* cmapsize : number of entries in the conversion map for combined characters
* ( optional , pass 0 if none )
* conv_func : algorithmic encoding conversion function
* ( optional , pass NULL if none )
* encoding : PG identifier for the local encoding
*
* For each character , the cmap ( if provided ) is consulted first ; if no match ,
* the map is consulted next ; if still no match , the conv_func ( if provided )
* is applied . An error is raised if no match is found .
*
* See pg_wchar . h for more details about the data structures used here .
*/
void
UtfToLocal ( const unsigned char * utf , unsigned char * iso ,
const pg_utf_to_local * map , const pg_utf_to_local_combined * cmap ,
int size1 , int size2 , int encoding , int len )
UtfToLocal ( const unsigned char * utf , int len ,
unsigned char * iso ,
const pg_utf_to_local * map , int mapsize ,
const pg_utf_to_local_combined * cmap , int cmapsize ,
utf_local_conversion_func conv_func ,
int encoding )
{
uint32 iutf ;
uint32 cutf [ 2 ] ;
uint32 code ;
pg_utf_to_local * p ;
pg_utf_to_local_combined * cp ;
int l ;
const pg_utf_to_local * p ;
const pg_utf_to_local_combined * cp ;
if ( ! PG_VALID_ENCODING ( encoding ) )
ereport ( ERROR ,
( errcode ( ERRCODE_INVALID_PARAMETER_VALUE ) ,
errmsg ( " invalid encoding number: %d " , encoding ) ) ) ;
for ( ; len > 0 ; len - = l )
{
@ -351,7 +366,6 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
break ;
l = pg_utf_mblen ( utf ) ;
if ( len < l )
break ;
@ -360,11 +374,13 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
if ( l = = 1 )
{
/* ASCII case is easy */
/* ASCII case is easy, assume it's one-to-one conversion */
* iso + + = * utf + + ;
continue ;
}
else if ( l = = 2 )
/* collect coded char of length l */
if ( l = = 2 )
{
iutf = * utf + + < < 8 ;
iutf | = * utf + + ;
@ -388,15 +404,14 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
iutf = 0 ; /* keep compiler quiet */
}
/*
* first , try with combined map if possible
*/
/* First, try with combined map if possible */
if ( cmap & & len > l )
{
const unsigned char * utf_save = utf ;
int len_save = len ;
int l_save = l ;
/* collect next character, same as above */
len - = l ;
l = pg_utf_mblen ( utf ) ;
@ -406,83 +421,83 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
if ( ! pg_utf8_islegal ( utf , l ) )
break ;
cutf [ 0 ] = iutf ;
if ( l = = 1 )
/* We assume ASCII character cannot be in combined map */
if ( l > 1 )
{
if ( len_save > 1 )
uint32 iutf2 ;
uint32 cutf [ 2 ] ;
if ( l = = 2 )
{
iutf2 = * utf + + < < 8 ;
iutf2 | = * utf + + ;
}
else if ( l = = 3 )
{
iutf2 = * utf + + < < 16 ;
iutf2 | = * utf + + < < 8 ;
iutf2 | = * utf + + ;
}
else if ( l = = 4 )
{
iutf2 = * utf + + < < 24 ;
iutf2 | = * utf + + < < 16 ;
iutf2 | = * utf + + < < 8 ;
iutf2 | = * utf + + ;
}
else
{
p = bsearch ( & cutf [ 0 ] , map , size1 ,
sizeof ( pg_utf_to_local ) , compare1 ) ;
if ( p = = NULL )
report_untranslatable_char ( PG_UTF8 , encoding ,
( const char * ) ( utf_save - l_save ) , len_save ) ;
iso = set_iso_code ( iso , p - > code ) ;
elog ( ERROR , " unsupported character length %d " , l ) ;
iutf2 = 0 ; /* keep compiler quiet */
}
/* ASCII case is easy */
* iso + + = * utf + + ;
continue ;
}
else if ( l = = 2 )
{
iutf = * utf + + < < 8 ;
iutf | = * utf + + ;
}
else if ( l = = 3 )
{
iutf = * utf + + < < 16 ;
iutf | = * utf + + < < 8 ;
iutf | = * utf + + ;
}
else if ( l = = 4 )
{
iutf = * utf + + < < 24 ;
iutf | = * utf + + < < 16 ;
iutf | = * utf + + < < 8 ;
iutf | = * utf + + ;
}
else
{
elog ( ERROR , " unsupported character length %d " , l ) ;
iutf = 0 ; /* keep compiler quiet */
}
cutf [ 0 ] = iutf ;
cutf [ 1 ] = iutf2 ;
cutf [ 1 ] = iutf ;
cp = bsearch ( cutf , cmap , size2 ,
sizeof ( pg_utf_to_local_combined ) , compare3 ) ;
if ( cp )
code = cp - > code ;
else
{
/* not found in combined map. try with ordinary map */
p = bsearch ( & cutf [ 0 ] , map , size1 ,
sizeof ( pg_utf_to_local ) , compare1 ) ;
if ( p = = NULL )
report_untranslatable_char ( PG_UTF8 , encoding ,
( const char * ) ( utf_save - l_save ) , len_save ) ;
iso = set_iso_code ( iso , p - > code ) ;
p = bsearch ( & cutf [ 1 ] , map , size1 ,
sizeof ( pg_utf_to_local ) , compare1 ) ;
if ( p = = NULL )
report_untranslatable_char ( PG_UTF8 , encoding ,
( const char * ) ( utf - l ) , len ) ;
code = p - > code ;
cp = bsearch ( cutf , cmap , cmapsize ,
sizeof ( pg_utf_to_local_combined ) , compare3 ) ;
if ( cp )
{
iso = store_coded_char ( iso , cp - > code ) ;
continue ;
}
}
/* fail, so back up to reprocess second character next time */
utf = utf_save ;
len = len_save ;
l = l_save ;
}
else /* no cmap or no remaining data */
/* Now check ordinary map */
p = bsearch ( & iutf , map , mapsize ,
sizeof ( pg_utf_to_local ) , compare1 ) ;
if ( p )
{
p = bsearch ( & iutf , map , size1 ,
sizeof ( pg_utf_to_local ) , compare1 ) ;
if ( p = = NULL )
report_untranslatable_char ( PG_UTF8 , encoding ,
( const char * ) ( utf - l ) , len ) ;
code = p - > code ;
iso = store_coded_char ( iso , p - > code ) ;
continue ;
}
/* if there's a conversion function, try that */
if ( conv_func )
{
uint32 converted = ( * conv_func ) ( iutf ) ;
if ( converted )
{
iso = store_coded_char ( iso , converted ) ;
continue ;
}
}
iso = set_iso_code ( iso , code ) ;
/* failed to translate this character */
report_untranslatable_char ( PG_UTF8 , encoding ,
( const char * ) ( utf - l ) , len ) ;
}
/* if we broke out of loop early, must be invalid input */
if ( len > 0 )
report_invalid_encoding ( PG_UTF8 , ( const char * ) utf , len ) ;
@ -492,26 +507,38 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
/*
* local code - - - > UTF8
*
* iso : input local string ( need not be null - terminated ) .
* iso : input string in local encoding ( need not be null - terminated )
* len : length of input string ( in bytes )
* utf : pointer to the output area ( must be large enough ! )
* map : the conversion map .
* cmap : the conversion map for combined characters .
* ( optional )
* size1 : the size of the conversion map .
* size2 : the size of the conversion map for combined characters
* ( optional )
* encoding : the PG identifier for the local encoding .
* len : length of input string .
( output string will be null - terminated )
* map : conversion map for single characters
* mapsize : number of entries in the conversion map
* cmap : conversion map for combined characters
* ( optional , pass NULL if none )
* cmapsize : number of entries in the conversion map for combined characters
* ( optional , pass 0 if none )
* conv_func : algorithmic encoding conversion function
* ( optional , pass NULL if none )
* encoding : PG identifier for the local encoding
*
* For each character , the map is consulted first ; if no match , the cmap
* ( if provided ) is consulted next ; if still no match , the conv_func
* ( if provided ) is applied . An error is raised if no match is found .
*
* See pg_wchar . h for more details about the data structures used here .
*/
void
LocalToUtf ( const unsigned char * iso , unsigned char * utf ,
const pg_local_to_utf * map , const pg_local_to_utf_combined * cmap ,
int size1 , int size2 , int encoding , int len )
LocalToUtf ( const unsigned char * iso , int len ,
unsigned char * utf ,
const pg_local_to_utf * map , int mapsize ,
const pg_local_to_utf_combined * cmap , int cmapsize ,
utf_local_conversion_func conv_func ,
int encoding )
{
unsigned int iiso ;
uint32 iiso ;
int l ;
pg_local_to_utf * p ;
pg_local_to_utf_combined * cp ;
const pg_local_to_utf * p ;
const pg_local_to_utf_combined * cp ;
if ( ! PG_VALID_ENCODING ( encoding ) )
ereport ( ERROR ,
@ -526,7 +553,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
if ( ! IS_HIGHBIT_SET ( * iso ) )
{
/* ASCII case is easy */
/* ASCII case is easy, assume it's one-to-one conversion */
* utf + + = * iso + + ;
l = 1 ;
continue ;
@ -536,6 +563,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
if ( l < 0 )
break ;
/* collect coded char of length l */
if ( l = = 1 )
iiso = * iso + + ;
else if ( l = = 2 )
@ -562,61 +590,48 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
iiso = 0 ; /* keep compiler quiet */
}
p = bsearch ( & iiso , map , size1 ,
/* First check ordinary map */
p = bsearch ( & iiso , map , mapsize ,
sizeof ( pg_local_to_utf ) , compare2 ) ;
if ( p = = NULL )
if ( p )
{
/*
* not found in the ordinary map . if there ' s a combined character
* map , try with it
*/
if ( cmap )
{
cp = bsearch ( & iiso , cmap , size2 ,
sizeof ( pg_local_to_utf_combined ) , compare4 ) ;
utf = store_coded_char ( utf , p - > utf ) ;
continue ;
}
if ( cp )
{
if ( cp - > utf1 & 0xff000000 )
* utf + + = cp - > utf1 > > 24 ;
if ( cp - > utf1 & 0x00ff0000 )
* utf + + = ( cp - > utf1 & 0x00ff0000 ) > > 16 ;
if ( cp - > utf1 & 0x0000ff00 )
* utf + + = ( cp - > utf1 & 0x0000ff00 ) > > 8 ;
if ( cp - > utf1 & 0x000000ff )
* utf + + = cp - > utf1 & 0x000000ff ;
if ( cp - > utf2 & 0xff000000 )
* utf + + = cp - > utf2 > > 24 ;
if ( cp - > utf2 & 0x00ff0000 )
* utf + + = ( cp - > utf2 & 0x00ff0000 ) > > 16 ;
if ( cp - > utf2 & 0x0000ff00 )
* utf + + = ( cp - > utf2 & 0x0000ff00 ) > > 8 ;
if ( cp - > utf2 & 0x000000ff )
* utf + + = cp - > utf2 & 0x000000ff ;
/* If there's a combined character map, try that */
if ( cmap )
{
cp = bsearch ( & iiso , cmap , cmapsize ,
sizeof ( pg_local_to_utf_combined ) , compare4 ) ;
continue ;
}
if ( cp )
{
utf = store_coded_char ( utf , cp - > utf1 ) ;
utf = store_coded_char ( utf , cp - > utf2 ) ;
continue ;
}
report_untranslatable_char ( encoding , PG_UTF8 ,
( const char * ) ( iso - l ) , len ) ;
}
else
/* if there's a conversion function, try that */
if ( conv_func )
{
if ( p - > utf & 0xff000000 )
* utf + + = p - > utf > > 24 ;
if ( p - > utf & 0x00ff0000 )
* utf + + = ( p - > utf & 0x00ff0000 ) > > 16 ;
if ( p - > utf & 0x0000ff00 )
* utf + + = ( p - > utf & 0x0000ff00 ) > > 8 ;
if ( p - > utf & 0x000000ff )
* utf + + = p - > utf & 0x000000ff ;
uint32 converted = ( * conv_func ) ( iiso ) ;
if ( converted )
{
utf = store_coded_char ( utf , converted ) ;
continue ;
}
}
/* failed to translate this character */
report_untranslatable_char ( encoding , PG_UTF8 ,
( const char * ) ( iso - l ) , len ) ;
}
/* if we broke out of loop early, must be invalid input */
if ( len > 0 )
report_invalid_encoding ( encoding , ( const char * ) iso , len ) ;