@ -93,35 +93,83 @@ initSuffixTree(char *filename)
do
{
char src [ 4096 ] ;
char trg [ 4096 ] ;
int srclen ;
int trglen ;
char * line = NULL ;
/*
* pg_do_encoding_conversion ( ) ( called by tsearch_readline ( ) ) will
* emit exception if it finds untranslatable characters in current
* locale . We just skip such lines , continuing with the next .
*/
skip = true ;
PG_TRY ( ) ;
{
/*
* pg_do_encoding_conversion ( ) ( called by tsearch_readline ( ) ) will
* emit exception if it finds untranslatable characters in current
* locale . We just skip such characters .
*/
char * line ;
while ( ( line = tsearch_readline ( & trst ) ) ! = NULL )
{
if ( sscanf ( line , " %s \t %s \n " , src , trg ) ! = 2 )
continue ;
/*
* The format of each line must be " src trg " where src and trg
* are sequences of one or more non - whitespace characters ,
* separated by whitespace . Whitespace at start or end of
* line is ignored .
*/
int state ;
char * ptr ;
char * src = NULL ;
char * trg = NULL ;
int ptrlen ;
int srclen = 0 ;
int trglen = 0 ;
state = 0 ;
for ( ptr = line ; * ptr ; ptr + = ptrlen )
{
ptrlen = pg_mblen ( ptr ) ;
/* ignore whitespace, but end src or trg */
if ( t_isspace ( ptr ) )
{
if ( state = = 1 )
state = 2 ;
else if ( state = = 3 )
state = 4 ;
continue ;
}
switch ( state )
{
case 0 :
/* start of src */
src = ptr ;
srclen = ptrlen ;
state = 1 ;
break ;
case 1 :
/* continue src */
srclen + = ptrlen ;
break ;
case 2 :
/* start of trg */
trg = ptr ;
trglen = ptrlen ;
state = 3 ;
break ;
case 3 :
/* continue trg */
trglen + = ptrlen ;
break ;
default :
/* bogus line format */
state = - 1 ;
break ;
}
}
srclen = strlen ( src ) ;
trglen = strlen ( trg ) ;
if ( state > = 3 )
rootSuffixTree = placeChar ( rootSuffixTree ,
( unsigned char * ) src , srclen ,
trg , trglen ) ;
rootSuffixTree = placeChar ( rootSuffixTree ,
( unsigned char * ) src , srclen ,
trg , trglen ) ;
skip = false ;
pfree ( line ) ;
}
skip = false ;
}
PG_CATCH ( ) ;
{