@ -457,13 +457,149 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
Conf - > naffixes + + ;
Conf - > naffixes + + ;
}
}
/* Parsing states for parse_affentry() and friends */
# define PAE_WAIT_MASK 0
# define PAE_WAIT_MASK 0
# define PAE_INMASK 1
# define PAE_INMASK 1
# define PAE_WAIT_FIND 2
# define PAE_WAIT_FIND 2
# define PAE_INFIND 3
# define PAE_INFIND 3
# define PAE_WAIT_REPL 4
# define PAE_WAIT_REPL 4
# define PAE_INREPL 5
# define PAE_INREPL 5
# define PAE_WAIT_TYPE 6
# define PAE_WAIT_FLAG 7
/*
* Parse next space - separated field of an . affix file line .
*
* * str is the input pointer ( will be advanced past field )
* next is where to copy the field value to , with null termination
*
* The buffer at " next " must be of size BUFSIZ ; we truncate the input to fit .
*
* Returns TRUE if we found a field , FALSE if not .
*/
static bool
get_nextfield ( char * * str , char * next )
{
int state = PAE_WAIT_MASK ;
int avail = BUFSIZ ;
while ( * * str )
{
if ( state = = PAE_WAIT_MASK )
{
if ( t_iseq ( * str , ' # ' ) )
return false ;
else if ( ! t_isspace ( * str ) )
{
int clen = pg_mblen ( * str ) ;
if ( clen < avail )
{
COPYCHAR ( next , * str ) ;
next + = clen ;
avail - = clen ;
}
state = PAE_INMASK ;
}
}
else /* state == PAE_INMASK */
{
if ( t_isspace ( * str ) )
{
* next = ' \0 ' ;
return true ;
}
else
{
int clen = pg_mblen ( * str ) ;
if ( clen < avail )
{
COPYCHAR ( next , * str ) ;
next + = clen ;
avail - = clen ;
}
}
}
* str + = pg_mblen ( * str ) ;
}
* next = ' \0 ' ;
return ( state = = PAE_INMASK ) ; /* OK if we got a nonempty field */
}
/*
* Parses entry of an . affix file of MySpell or Hunspell format .
*
* An . affix file entry has the following format :
* - header
* < type > < flag > < cross_flag > < flag_count >
* - fields after header :
* < type > < flag > < find > < replace > < mask >
*
* str is the input line
* field values are returned to type etc , which must be buffers of size BUFSIZ .
*
* Returns number of fields found ; any omitted fields are set to empty strings .
*/
static int
parse_ooaffentry ( char * str , char * type , char * flag , char * find ,
char * repl , char * mask )
{
int state = PAE_WAIT_TYPE ;
int fields_read = 0 ;
bool valid = false ;
* type = * flag = * find = * repl = * mask = ' \0 ' ;
while ( * str )
{
switch ( state )
{
case PAE_WAIT_TYPE :
valid = get_nextfield ( & str , type ) ;
state = PAE_WAIT_FLAG ;
break ;
case PAE_WAIT_FLAG :
valid = get_nextfield ( & str , flag ) ;
state = PAE_WAIT_FIND ;
break ;
case PAE_WAIT_FIND :
valid = get_nextfield ( & str , find ) ;
state = PAE_WAIT_REPL ;
break ;
case PAE_WAIT_REPL :
valid = get_nextfield ( & str , repl ) ;
state = PAE_WAIT_MASK ;
break ;
case PAE_WAIT_MASK :
valid = get_nextfield ( & str , mask ) ;
state = - 1 ; /* force loop exit */
break ;
default :
elog ( ERROR , " unrecognized state in parse_ooaffentry: %d " ,
state ) ;
break ;
}
if ( valid )
fields_read + + ;
else
break ; /* early EOL */
if ( state < 0 )
break ; /* got all fields */
}
return fields_read ;
}
/*
* Parses entry of an . affix file of Ispell format
*
* An . affix file entry has the following format :
* < mask > > [ - < find > , ] < replace >
*/
static bool
static bool
parse_affentry ( char * str , char * mask , char * find , char * repl )
parse_affentry ( char * str , char * mask , char * find , char * repl )
{
{
@ -618,8 +754,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
int flag = 0 ;
int flag = 0 ;
char flagflags = 0 ;
char flagflags = 0 ;
tsearch_readline_state trst ;
tsearch_readline_state trst ;
int scanread = 0 ;
char scanbuf [ BUFSIZ ] ;
char * recoded ;
char * recoded ;
/* read file to find any flag */
/* read file to find any flag */
@ -682,8 +816,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
}
}
tsearch_readline_end ( & trst ) ;
tsearch_readline_end ( & trst ) ;
sprintf ( scanbuf , " %%6s %%%ds %%%ds %%%ds %%%ds " , BUFSIZ / 5 , BUFSIZ / 5 , BUFSIZ / 5 , BUFSIZ / 5 ) ;
if ( ! tsearch_readline_begin ( & trst , filename ) )
if ( ! tsearch_readline_begin ( & trst , filename ) )
ereport ( ERROR ,
ereport ( ERROR ,
( errcode ( ERRCODE_CONFIG_FILE_ERROR ) ,
( errcode ( ERRCODE_CONFIG_FILE_ERROR ) ,
@ -692,18 +824,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
while ( ( recoded = tsearch_readline ( & trst ) ) ! = NULL )
while ( ( recoded = tsearch_readline ( & trst ) ) ! = NULL )
{
{
int fields_read ;
if ( * recoded = = ' \0 ' | | t_isspace ( recoded ) | | t_iseq ( recoded , ' # ' ) )
if ( * recoded = = ' \0 ' | | t_isspace ( recoded ) | | t_iseq ( recoded , ' # ' ) )
goto nextline ;
goto nextline ;
scanread = sscanf ( recoded , scanbuf , type , sflag , find , repl , mask ) ;
fields_read = parse_ooaffentry ( recoded , type , sflag , find , repl , mask ) ;
if ( ptype )
if ( ptype )
pfree ( ptype ) ;
pfree ( ptype ) ;
ptype = lowerstr_ctx ( Conf , type ) ;
ptype = lowerstr_ctx ( Conf , type ) ;
if ( scanread < 4 | | ( STRNCMP ( ptype , " sfx " ) & & STRNCMP ( ptype , " pfx " ) ) )
if ( fields_read < 4 | |
( STRNCMP ( ptype , " sfx " ) ! = 0 & & STRNCMP ( ptype , " pfx " ) ! = 0 ) )
goto nextline ;
goto nextline ;
if ( scan read = = 4 )
if ( fields_ read = = 4 )
{
{
if ( strlen ( sflag ) ! = 1 )
if ( strlen ( sflag ) ! = 1 )
goto nextline ;
goto nextline ;
@ -722,9 +857,13 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
if ( strlen ( sflag ) ! = 1 | | flag ! = * sflag | | flag = = 0 )
if ( strlen ( sflag ) ! = 1 | | flag ! = * sflag | | flag = = 0 )
goto nextline ;
goto nextline ;
prepl = lowerstr_ctx ( Conf , repl ) ;
prepl = lowerstr_ctx ( Conf , repl ) ;
/* affix flag */
/* Find position of '/' in lowercased string "prepl" */
if ( ( ptr = strchr ( prepl , ' / ' ) ) ! = NULL )
if ( ( ptr = strchr ( prepl , ' / ' ) ) ! = NULL )
{
{
/*
* Here we use non - lowercased string " repl " . We need position
* of ' / ' in " repl " .
*/
* ptr = ' \0 ' ;
* ptr = ' \0 ' ;
ptr = repl + ( ptr - prepl ) + 1 ;
ptr = repl + ( ptr - prepl ) + 1 ;
while ( * ptr )
while ( * ptr )
@ -800,11 +939,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
if ( STRNCMP ( pstr , " compoundwords " ) = = 0 )
if ( STRNCMP ( pstr , " compoundwords " ) = = 0 )
{
{
/* Find position in lowercased string "pstr" */
s = findchar ( pstr , ' l ' ) ;
s = findchar ( pstr , ' l ' ) ;
if ( s )
if ( s )
{
{
s = recoded + ( s - pstr ) ; /* we need non-lowercased
/* Here we use non-lowercased string "recoded" */
* string */
s = recoded + ( s - pstr ) ;
while ( * s & & ! t_isspace ( s ) )
while ( * s & & ! t_isspace ( s ) )
s + = pg_mblen ( s ) ;
s + = pg_mblen ( s ) ;
while ( * s & & t_isspace ( s ) )
while ( * s & & t_isspace ( s ) )