@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* wparser_def . c
* Standard word parser
* Default text search parser
*
* Portions Copyright ( c ) 1996 - 2007 , PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / tsearch / wparser_def . c , v 1.3 2007 / 09 / 07 15 : 09 : 55 teodor Exp $
* $ PostgreSQL : pgsql / src / backend / tsearch / wparser_def . c , v 1.4 2007 / 10 / 23 20 : 46 : 12 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
@ -22,79 +22,53 @@
# include "utils/builtins.h"
/* rememder !!!! */
# define LASTNUM 23
/* Output token categories */
# define LAT WORD 1
# define CYR WORD 2
# define UWORD 3
# define EMAIL 4
# define F URL 5
# define HOST 6
# define SCIENTIFIC 7
# define ASCII WORD 1
# define WORD_T 2
# define N UM WORD 3
# define EMAIL 4
# define URL_T 5
# define HOST 6
# define SCIENTIFIC 7
# define VERSIONNUMBER 8
# define PARTHYPHEN WORD 9
# define CYR PARTHYPHEN WORD 10
# define LATPARTHYPHEN WORD 11
# define SPACE 12
# define TAG 13
# define NUM PARTHWORD 9
# define PARTHWORD 10
# define ASCIIPARTH WORD 11
# define SPACE 12
# define TAG_T 13
# define PROTOCOL 14
# define HYPHENWORD 15
# define LATHYPHENWORD 16
# define CYRHYPHENWORD 17
# define URI 18
# define FILEPATH 19
# define DECIMAL 20
# define SIGNEDINT 21
# define UNSIGNEDINT 22
# define HTMLENTITY 23
static const char * lex_descr [ ] = {
# define NUMHWORD 15
# define ASCIIHWORD 16
# define HWORD 17
# define URI 18
# define FILEPATH 19
# define DECIMAL 20
# define SIGNEDINT 21
# define UNSIGNEDINT 22
# define HTMLENTITY 23
# define LASTNUM 23
static const char * const tok_alias [ ] = {
" " ,
" Latin word " ,
" Non-latin word " ,
" Word " ,
" Email " ,
" URL " ,
" Host " ,
" Scientific notation " ,
" VERSION " ,
" Part of hyphenated word " ,
" Non-latin part of hyphenated word " ,
" Latin part of hyphenated word " ,
" Space symbols " ,
" HTML Tag " ,
" Protocol head " ,
" Hyphenated word " ,
" Latin hyphenated word " ,
" Non-latin hyphenated word " ,
" URI " ,
" File or path name " ,
" Decimal notation " ,
" Signed integer " ,
" Unsigned integer " ,
" HTML Entity "
} ;
static const char * tok_alias [ ] = {
" " ,
" lword " ,
" nlword " ,
" asciiword " ,
" word " ,
" numword " ,
" email " ,
" url " ,
" host " ,
" sfloat " ,
" version " ,
" part_ hword" ,
" nlpart_ hword" ,
" lpart_ hword" ,
" hword_numpart " ,
" hword_part " ,
" hword_asciipart " ,
" blank " ,
" tag " ,
" protocol " ,
" numhword " ,
" asciihword " ,
" hword " ,
" lhword " ,
" nlhword " ,
" uri " ,
" file " ,
" float " ,
@ -103,12 +77,42 @@ static const char *tok_alias[] = {
" entity "
} ;
static const char * const lex_descr [ ] = {
" " ,
" Word, all ASCII " ,
" Word, all letters " ,
" Word, letters and digits " ,
" Email address " ,
" URL " ,
" Host " ,
" Scientific notation " ,
" Version number " ,
" Hyphenated word part, letters and digits " ,
" Hyphenated word part, all letters " ,
" Hyphenated word part, all ASCII " ,
" Space symbols " ,
" HTML tag " ,
" Protocol head " ,
" Hyphenated word, letters and digits " ,
" Hyphenated word, all ASCII " ,
" Hyphenated word, all letters " ,
" URI " ,
" File or path name " ,
" Decimal notation " ,
" Signed integer " ,
" Unsigned integer " ,
" HTML entity "
} ;
/* Parser states */
typedef enum
{
TPS_Base = 0 ,
TPS_InUWord ,
TPS_InLatWord ,
TPS_InCyrWord ,
TPS_InNum Word ,
TPS_InAscii Word ,
TPS_InWord ,
TPS_InUnsignedInt ,
TPS_InSignedIntFirst ,
TPS_InSignedInt ,
@ -167,20 +171,20 @@ typedef enum
TPS_InProtocolFirst ,
TPS_InProtocolSecond ,
TPS_InProtocolEnd ,
TPS_InHyphenLat WordFirst ,
TPS_InHyphenLat Word ,
TPS_InHyphenCyr WordFirst ,
TPS_InHyphenCyr Word ,
TPS_InHyphenU WordFirst ,
TPS_InHyphenU Word ,
TPS_InHyphenAscii WordFirst ,
TPS_InHyphenAscii Word ,
TPS_InHyphenWordFirst ,
TPS_InHyphenWord ,
TPS_InHyphenNum WordFirst ,
TPS_InHyphenNum Word ,
TPS_InHyphenValueFirst ,
TPS_InHyphenValue ,
TPS_InHyphenValueExact ,
TPS_InParseHyphen ,
TPS_InParseHyphenHyphen ,
TPS_InHyphenCyr WordPart ,
TPS_InHyphenLat WordPart ,
TPS_InHyphenU WordPart ,
TPS_InHyphenWordPart ,
TPS_InHyphenAscii WordPart ,
TPS_InHyphenNum WordPart ,
TPS_InHyphenUnsignedInt ,
TPS_InHDecimalPartFirst ,
TPS_InHDecimalPart ,
@ -192,7 +196,6 @@ typedef enum
/* forward declaration */
struct TParser ;
typedef int ( * TParserCharTest ) ( struct TParser * ) ; /* any p_is* functions
* except p_iseq */
typedef void ( * TParserSpecial ) ( struct TParser * ) ; /* special handler for
@ -208,6 +211,16 @@ typedef struct
TParserSpecial special ;
} TParserStateActionItem ;
/* Flag bits in TParserStateActionItem.flags */
# define A_NEXT 0x0000
# define A_BINGO 0x0001
# define A_POP 0x0002
# define A_PUSH 0x0004
# define A_RERUN 0x0008
# define A_CLEAR 0x0010
# define A_MERGE 0x0020
# define A_CLRALL 0x0040
typedef struct
{
TParserState state ;
@ -255,6 +268,11 @@ typedef struct TParser
} TParser ;
/* forward decls here */
static bool TParserGet ( TParser * prs ) ;
static TParserPosition *
newTParserPosition ( TParserPosition * prev )
{
@ -303,8 +321,6 @@ TParserInit(char *str, int len)
return prs ;
}
static bool TParserGet ( TParser * prs ) ;
static void
TParserClose ( TParser * prs )
{
@ -325,10 +341,10 @@ TParserClose(TParser * prs)
}
/*
* defining support function , equvalent is * macrose s , but
* Character - type support functions , equi valent to is * macros , but
* working with any possible encodings and locales . Note ,
* that with multibyte encoding and C - locale isw * function may fail
* or give wrong result . Note 2 : multibyte encoding and C - local
* or give wrong result . Note 2 : multibyte encoding and C - locale
* often are used for Asian languages
*/
@ -487,17 +503,13 @@ p_isascii(TParser * prs)
}
static int
p_islatin ( TParser * prs )
p_isasclet ( TParser * prs )
{
return ( p_isalpha ( prs ) & & p_isascii ( prs ) ) ? 1 : 0 ;
return ( p_isascii ( prs ) & & p_isalpha ( prs ) ) ? 1 : 0 ;
}
static int
p_isnonlatin ( TParser * prs )
{
return ( p_isalpha ( prs ) & & ! p_isascii ( prs ) ) ? 1 : 0 ;
}
/* deliberately suppress unused-function complaints for the above */
void _make_compiler_happy ( void ) ;
void
_make_compiler_happy ( void )
@ -638,21 +650,12 @@ p_isURI(TParser * prs)
* Table of state / action of parser
*/
# define A_NEXT 0x0000
# define A_BINGO 0x0001
# define A_POP 0x0002
# define A_PUSH 0x0004
# define A_RERUN 0x0008
# define A_CLEAR 0x0010
# define A_MERGE 0x0020
# define A_CLRALL 0x0040
static TParserStateActionItem actionTPS_Base [ ] = {
{ p_isEOF , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_iseqC , ' < ' , A_PUSH , TPS_InTagFirst , 0 , NULL } ,
{ p_isignore , 0 , A_NEXT , TPS_InSpace , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InLat Word , 0 , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InCyr Word , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InAsciiWord , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InWord , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InUnsignedInt , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InSignedIntFirst , 0 , NULL } ,
{ p_iseqC , ' + ' , A_PUSH , TPS_InSignedIntFirst , 0 , NULL } ,
@ -664,37 +667,38 @@ static TParserStateActionItem actionTPS_Base[] = {
} ;
static TParserStateActionItem actionTPS_InU Word [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , UWORD , NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InU Word , 0 , NULL } ,
static TParserStateActionItem actionTPS_InNum Word [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , N UM WORD, NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InNum Word , 0 , NULL } ,
{ p_iseqC , ' @ ' , A_PUSH , TPS_InEmail , 0 , NULL } ,
{ p_iseqC , ' / ' , A_PUSH , TPS_InFileFirst , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InFileNext , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenU WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , UWORD , NULL }
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenNum WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , N UM WORD, NULL }
} ;
static TParserStateActionItem actionTPS_InLat Word [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , LAT WORD, NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
static TParserStateActionItem actionTPS_InAscii Word [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , ASCII WORD, NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InHostFirstDomain , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InFileNext , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHostFirstAN , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenLat WordFirst , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenAscii WordFirst , 0 , NULL } ,
{ p_iseqC , ' @ ' , A_PUSH , TPS_InEmail , 0 , NULL } ,
{ p_iseqC , ' : ' , A_PUSH , TPS_InProtocolFirst , 0 , NULL } ,
{ p_iseqC , ' / ' , A_PUSH , TPS_InFileFirst , 0 , NULL } ,
{ p_isdigit , 0 , A_PUSH , TPS_InHost , 0 , NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InUWord , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , LATWORD , NULL }
{ p_isdigit , 0 , A_NEXT , TPS_InNumWord , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InWord , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , ASCIIWORD , NULL }
} ;
static TParserStateActionItem actionTPS_InCyr Word [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , CYR WORD, NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InU Word , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenCyr WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , CYR WORD, NULL }
static TParserStateActionItem actionTPS_InWord [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , WORD_T , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InNum Word , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenWordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , WORD_T , NULL }
} ;
static TParserStateActionItem actionTPS_InUnsignedInt [ ] = {
@ -704,8 +708,8 @@ static TParserStateActionItem actionTPS_InUnsignedInt[] = {
{ p_iseqC , ' . ' , A_PUSH , TPS_InUDecimalFirst , 0 , NULL } ,
{ p_iseqC , ' e ' , A_PUSH , TPS_InMantissaFirst , 0 , NULL } ,
{ p_iseqC , ' E ' , A_PUSH , TPS_InMantissaFirst , 0 , NULL } ,
{ p_islatin , 0 , A_PUSH , TPS_InHost , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InU Word , 0 , NULL } ,
{ p_isasclet , 0 , A_PUSH , TPS_InHost , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InNum Word , 0 , NULL } ,
{ p_iseqC , ' / ' , A_PUSH , TPS_InFileFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_Base , UNSIGNEDINT , NULL }
} ;
@ -816,13 +820,13 @@ static TParserStateActionItem actionTPS_InMantissa[] = {
static TParserStateActionItem actionTPS_InHTMLEntityFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_iseqC , ' # ' , A_NEXT , TPS_InHTMLEntityNumFirst , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHTMLEntity , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHTMLEntity , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHTMLEntity [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHTMLEntity , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHTMLEntity , 0 , NULL } ,
{ p_iseqC , ' ; ' , A_NEXT , TPS_InHTMLEntityEnd , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
@ -849,7 +853,7 @@ static TParserStateActionItem actionTPS_InTagFirst[] = {
{ p_iseqC , ' / ' , A_PUSH , TPS_InTagCloseFirst , 0 , NULL } ,
{ p_iseqC , ' ! ' , A_PUSH , TPS_InCommentFirst , 0 , NULL } ,
{ p_iseqC , ' ? ' , A_PUSH , TPS_InXMLBegin , 0 , NULL } ,
{ p_islatin , 0 , A_PUSH , TPS_InTagName , 0 , NULL } ,
{ p_isasclet , 0 , A_PUSH , TPS_InTagName , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
@ -863,7 +867,7 @@ static TParserStateActionItem actionTPS_InXMLBegin[] = {
static TParserStateActionItem actionTPS_InTagCloseFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InTagName , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InTagName , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
@ -873,7 +877,7 @@ static TParserStateActionItem actionTPS_InTagName[] = {
{ p_iseqC , ' / ' , A_NEXT , TPS_InTagBeginEnd , 0 , NULL } ,
{ p_iseqC , ' > ' , A_NEXT , TPS_InTagEnd , 0 , SpecialTags } ,
{ p_isspace , 0 , A_NEXT , TPS_InTag , 0 , SpecialTags } ,
{ p_islatin , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
@ -888,7 +892,7 @@ static TParserStateActionItem actionTPS_InTag[] = {
{ p_iseqC , ' > ' , A_NEXT , TPS_InTagEnd , 0 , SpecialTags } ,
{ p_iseqC , ' \' ' , A_NEXT , TPS_InTagEscapeK , 0 , NULL } ,
{ p_iseqC , ' " ' , A_NEXT , TPS_InTagEscapeKK , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_iseqC , ' = ' , A_NEXT , TPS_Null , 0 , NULL } ,
{ p_iseqC , ' - ' , A_NEXT , TPS_Null , 0 , NULL } ,
@ -924,7 +928,7 @@ static TParserStateActionItem actionTPS_InTagBackSleshed[] = {
} ;
static TParserStateActionItem actionTPS_InTagEnd [ ] = {
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_Base , TAG , NULL }
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_Base , TAG_T , NULL }
} ;
static TParserStateActionItem actionTPS_InCommentFirst [ ] = {
@ -962,19 +966,19 @@ static TParserStateActionItem actionTPS_InCloseCommentLast[] = {
} ;
static TParserStateActionItem actionTPS_InCommentEnd [ ] = {
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_Base , TAG , NULL }
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_Base , TAG_T , NULL }
} ;
static TParserStateActionItem actionTPS_InHostFirstDomain [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHostDomainSecond , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHostDomainSecond , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHostDomainSecond [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHostDomain , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHostDomain , 0 , NULL } ,
{ p_isdigit , 0 , A_PUSH , TPS_InHost , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHostFirstAN , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InHostFirstDomain , 0 , NULL } ,
@ -984,7 +988,7 @@ static TParserStateActionItem actionTPS_InHostDomainSecond[] = {
static TParserStateActionItem actionTPS_InHostDomain [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_Base , HOST , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHostDomain , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHostDomain , 0 , NULL } ,
{ p_isdigit , 0 , A_PUSH , TPS_InHost , 0 , NULL } ,
{ p_iseqC , ' : ' , A_PUSH , TPS_InPortFirst , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHostFirstAN , 0 , NULL } ,
@ -1013,14 +1017,14 @@ static TParserStateActionItem actionTPS_InPort[] = {
static TParserStateActionItem actionTPS_InHostFirstAN [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHost [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHost , 0 , NULL } ,
{ p_iseqC , ' @ ' , A_PUSH , TPS_InEmail , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InHostFirstDomain , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHostFirstAN , 0 , NULL } ,
@ -1034,7 +1038,7 @@ static TParserStateActionItem actionTPS_InEmail[] = {
static TParserStateActionItem actionTPS_InFileFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' . ' , A_NEXT , TPS_InPathFirst , 0 , NULL } ,
{ p_iseqC , ' _ ' , A_NEXT , TPS_InFile , 0 , NULL } ,
@ -1045,7 +1049,7 @@ static TParserStateActionItem actionTPS_InFileFirst[] = {
static TParserStateActionItem actionTPS_InFileTwiddle [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' _ ' , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' / ' , A_NEXT , TPS_InFileFirst , 0 , NULL } ,
@ -1054,7 +1058,7 @@ static TParserStateActionItem actionTPS_InFileTwiddle[] = {
static TParserStateActionItem actionTPS_InPathFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' _ ' , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' . ' , A_NEXT , TPS_InPathSecond , 0 , NULL } ,
@ -1079,7 +1083,7 @@ static TParserStateActionItem actionTPS_InPathSecond[] = {
static TParserStateActionItem actionTPS_InFile [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , FILEPATH , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InFileNext , 0 , NULL } ,
{ p_iseqC , ' _ ' , A_NEXT , TPS_InFile , 0 , NULL } ,
@ -1091,7 +1095,7 @@ static TParserStateActionItem actionTPS_InFile[] = {
static TParserStateActionItem actionTPS_InFileNext [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_CLEAR , TPS_InFile , 0 , NULL } ,
{ p_isasclet , 0 , A_CLEAR , TPS_InFile , 0 , NULL } ,
{ p_isdigit , 0 , A_CLEAR , TPS_InFile , 0 , NULL } ,
{ p_iseqC , ' _ ' , A_CLEAR , TPS_InFile , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
@ -1119,7 +1123,7 @@ static TParserStateActionItem actionTPS_InURI[] = {
static TParserStateActionItem actionTPS_InFURL [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_isURI , 0 , A_BINGO | A_CLRALL , TPS_Base , F URL, SpecialFURL } ,
{ p_isURI , 0 , A_BINGO | A_CLRALL , TPS_Base , URL_T , SpecialFURL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
@ -1139,54 +1143,52 @@ static TParserStateActionItem actionTPS_InProtocolEnd[] = {
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_Base , PROTOCOL , NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenLat WordFirst [ ] = {
static TParserStateActionItem actionTPS_InHyphenAscii WordFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenLat Word , 0 , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHyphenAscii Word , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWord , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenValue , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenNum Word , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenLat Word [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , LATHYPHEN WORD, SpecialHyphen } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenLat Word , 0 , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenLat WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , LATHYPHEN WORD, SpecialHyphen }
static TParserStateActionItem actionTPS_InHyphenAscii Word [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , ASCIIH WORD, SpecialHyphen } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHyphenAscii Word , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWord , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenNum Word , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenAscii WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , ASCIIH WORD, SpecialHyphen }
} ;
static TParserStateActionItem actionTPS_InHyphenCyr WordFirst [ ] = {
static TParserStateActionItem actionTPS_InHyphenWordFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenCyrWord , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenUWord , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWord , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenValue , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenNum Word , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenCyrWord [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , CYRHYPHENWORD , SpecialHyphen } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenCyrWord , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenUWord , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenUWord , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenCyrWordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , CYRHYPHENWORD , SpecialHyphen }
static TParserStateActionItem actionTPS_InHyphenWord [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HWORD , SpecialHyphen } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWord , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenNumWord , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenWordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HWORD , SpecialHyphen }
} ;
static TParserStateActionItem actionTPS_InHyphenU WordFirst [ ] = {
static TParserStateActionItem actionTPS_InHyphenNum WordFirst [ ] = {
{ p_isEOF , 0 , A_POP , TPS_Null , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenValue , 0 , NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenNum Word , 0 , NULL } ,
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenU Word [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HYPHE NWORD, SpecialHyphen } ,
{ p_isalnum , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenU WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HYPHE NWORD, SpecialHyphen }
static TParserStateActionItem actionTPS_InHyphenNum Word [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , NUMH WORD , SpecialHyphen } ,
{ p_isalnum , 0 , A_NEXT , TPS_InHyphenNum Word , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenNum WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , NUMH WORD , SpecialHyphen }
} ;
static TParserStateActionItem actionTPS_InHyphenValueFirst [ ] = {
@ -1196,26 +1198,26 @@ static TParserStateActionItem actionTPS_InHyphenValueFirst[] = {
} ;
static TParserStateActionItem actionTPS_InHyphenValue [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HYPHE NWORD, SpecialHyphen } ,
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , NUMH WORD , SpecialHyphen } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenValue , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InHyphenValueFirst , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenU WordFirst , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenU Word , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HYPHE NWORD, SpecialHyphen }
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenNum WordFirst , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenNum Word , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , NUMH WORD , SpecialHyphen }
} ;
static TParserStateActionItem actionTPS_InHyphenValueExact [ ] = {
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HYPHE NWORD, SpecialHyphen } ,
{ p_isEOF , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , NUMH WORD , SpecialHyphen } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenValueExact , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InHyphenValueFirst , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenU WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , HYPHE NWORD, SpecialHyphen }
{ p_iseqC , ' - ' , A_PUSH , TPS_InHyphenNum WordFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO | A_CLRALL , TPS_InParseHyphen , NUMH WORD , SpecialHyphen }
} ;
static TParserStateActionItem actionTPS_InParseHyphen [ ] = {
{ p_isEOF , 0 , A_RERUN , TPS_Base , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenLat WordPart , 0 , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenCyr WordPart , 0 , NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHyphenAscii WordPart , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWordPart , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenUnsignedInt , 0 , NULL } ,
{ p_iseqC , ' - ' , A_PUSH , TPS_InParseHyphenHyphen , 0 , NULL } ,
{ NULL , 0 , A_RERUN , TPS_Base , 0 , NULL }
@ -1227,32 +1229,31 @@ static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
{ NULL , 0 , A_POP , TPS_Null , 0 , NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenCyrWordPart [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , CYRPARTHYPHENWORD , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenCyrWordPart , 0 , NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenUWordPart , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenUWordPart , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , CYRPARTHYPHENWORD , NULL }
static TParserStateActionItem actionTPS_InHyphenWordPart [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , PARTHWORD , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWordPart , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenNumWordPart , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , PARTHWORD , NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenLat WordPart [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , LATPARTHYPHEN WORD, NULL } ,
{ p_islatin , 0 , A_NEXT , TPS_InHyphenLat WordPart , 0 , NULL } ,
{ p_isnonlatin , 0 , A_NEXT , TPS_InHyphenU WordPart , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenU WordPart , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , LATPARTHYPHEN WORD, NULL }
static TParserStateActionItem actionTPS_InHyphenAscii WordPart [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , ASCIIPARTH WORD, NULL } ,
{ p_isasclet , 0 , A_NEXT , TPS_InHyphenAscii WordPart , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenWordPart , 0 , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenNum WordPart , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , ASCIIPARTH WORD, NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenU WordPart [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , PARTHYPHEN WORD , NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InHyphenU WordPart , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , PARTHYPHEN WORD , NULL }
static TParserStateActionItem actionTPS_InHyphenNum WordPart [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , NUM PARTHWORD, NULL } ,
{ p_isalnum , 0 , A_NEXT , TPS_InHyphenNum WordPart , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , NUM PARTHWORD, NULL }
} ;
static TParserStateActionItem actionTPS_InHyphenUnsignedInt [ ] = {
{ p_isEOF , 0 , A_BINGO , TPS_Base , UNSIGNEDINT , NULL } ,
{ p_isdigit , 0 , A_NEXT , TPS_InHyphenUnsignedInt , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenU WordPart , 0 , NULL } ,
{ p_isalpha , 0 , A_NEXT , TPS_InHyphenNum WordPart , 0 , NULL } ,
{ p_iseqC , ' . ' , A_PUSH , TPS_InHDecimalPartFirst , 0 , NULL } ,
{ NULL , 0 , A_BINGO , TPS_InParseHyphen , UNSIGNEDINT , NULL }
} ;
@ -1284,14 +1285,14 @@ static TParserStateActionItem actionTPS_InHVersionPart[] = {
} ;
/*
* order should be the same as in typedef enum { } TParserState ! !
* order must be the same as in typedef enum { } TParserState ! !
*/
static const TParserStateAction Actions [ ] = {
{ TPS_Base , actionTPS_Base } ,
{ TPS_InUWord , actionTPS_InU Word } ,
{ TPS_InLatWord , actionTPS_InLat Word } ,
{ TPS_InCyr Word , actionTPS_InCyr Word } ,
{ TPS_InNumWord , actionTPS_InNum Word } ,
{ TPS_InAsciiWord , actionTPS_InAscii Word } ,
{ TPS_InWord , actionTPS_InWord } ,
{ TPS_InUnsignedInt , actionTPS_InUnsignedInt } ,
{ TPS_InSignedIntFirst , actionTPS_InSignedIntFirst } ,
{ TPS_InSignedInt , actionTPS_InSignedInt } ,
@ -1350,20 +1351,20 @@ static const TParserStateAction Actions[] = {
{ TPS_InProtocolFirst , actionTPS_InProtocolFirst } ,
{ TPS_InProtocolSecond , actionTPS_InProtocolSecond } ,
{ TPS_InProtocolEnd , actionTPS_InProtocolEnd } ,
{ TPS_InHyphenLatWordFirst , actionTPS_InHyphenLat WordFirst } ,
{ TPS_InHyphenLatWord , actionTPS_InHyphenLat Word } ,
{ TPS_InHyphenCyr WordFirst , actionTPS_InHyphenCyr WordFirst } ,
{ TPS_InHyphenCyr Word , actionTPS_InHyphenCyr Word } ,
{ TPS_InHyphenUWordFirst , actionTPS_InHyphenU WordFirst } ,
{ TPS_InHyphenUWord , actionTPS_InHyphenU Word } ,
{ TPS_InHyphenAsciiWordFirst , actionTPS_InHyphenAscii WordFirst } ,
{ TPS_InHyphenAsciiWord , actionTPS_InHyphenAscii Word } ,
{ TPS_InHyphenWordFirst , actionTPS_InHyphenWordFirst } ,
{ TPS_InHyphenWord , actionTPS_InHyphenWord } ,
{ TPS_InHyphenNumWordFirst , actionTPS_InHyphenNum WordFirst } ,
{ TPS_InHyphenNumWord , actionTPS_InHyphenNum Word } ,
{ TPS_InHyphenValueFirst , actionTPS_InHyphenValueFirst } ,
{ TPS_InHyphenValue , actionTPS_InHyphenValue } ,
{ TPS_InHyphenValueExact , actionTPS_InHyphenValueExact } ,
{ TPS_InParseHyphen , actionTPS_InParseHyphen } ,
{ TPS_InParseHyphenHyphen , actionTPS_InParseHyphenHyphen } ,
{ TPS_InHyphenCyr WordPart , actionTPS_InHyphenCyr WordPart } ,
{ TPS_InHyphenLatWordPart , actionTPS_InHyphenLat WordPart } ,
{ TPS_InHyphenUWordPart , actionTPS_InHyphenU WordPart } ,
{ TPS_InHyphenWordPart , actionTPS_InHyphenWordPart } ,
{ TPS_InHyphenAsciiWordPart , actionTPS_InHyphenAscii WordPart } ,
{ TPS_InHyphenNumWordPart , actionTPS_InHyphenNum WordPart } ,
{ TPS_InHyphenUnsignedInt , actionTPS_InHyphenUnsignedInt } ,
{ TPS_InHDecimalPartFirst , actionTPS_InHDecimalPartFirst } ,
{ TPS_InHDecimalPart , actionTPS_InHDecimalPart } ,
@ -1378,10 +1379,11 @@ TParserGet(TParser * prs)
{
TParserStateActionItem * item = NULL ;
Assert ( prs - > state ) ;
if ( prs - > state - > posbyte > = prs - > lenstr )
return false ;
Assert ( prs - > state ) ;
prs - > lexeme = prs - > str + prs - > state - > posbyte ;
prs - > state - > pushedAtAction = NULL ;
@ -1488,10 +1490,12 @@ TParserGet(TParser * prs)
prs - > state - > state = item - > tostate ;
/* check for go away */
if ( ( item - > flags & A_BINGO ) | | ( prs - > state - > posbyte > = prs - > lenstr & & ( item - > flags & A_RERUN ) = = 0 ) )
if ( ( item - > flags & A_BINGO ) | |
( prs - > state - > posbyte > = prs - > lenstr & &
( item - > flags & A_RERUN ) = = 0 ) )
break ;
/* go to begining of loop if we should rerun or we just restore state */
/* go to beginn ing of loop if we should rerun or we just restore state */
if ( item - > flags & ( A_RERUN | A_POP ) )
continue ;
@ -1557,16 +1561,15 @@ prsd_end(PG_FUNCTION_ARGS)
PG_RETURN_VOID ( ) ;
}
# define LEAVETOKEN(x) ( (x)==12 )
# define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
# define ENDPUNCTOKEN(x) ( (x)==12 )
# define LEAVETOKEN(x) ( (x)==SPACE )
# define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
# define ENDPUNCTOKEN(x) ( (x)==SPACE )
# define TS_IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
# define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
# define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
# define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
# define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || TS_IDIGNORE(x) )
# define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==HTMLENTITY )
# define HLIDIGNORE(x) ( (x)==URL_T || (x)==TAG_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
# define HTMLHLIDIGNORE(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
# define NONWORDTOKEN(x) ( (x)==SPACE || HLIDIGNORE(x) )
# define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
typedef struct
{