@ -34,9 +34,12 @@
/*
* longest - longest - preferred matching engine
*
* On success , returns match endpoint address . Returns NULL on no match .
* Internal errors also return NULL , with v - > err set .
*/
static chr * /* endpoint, or NULL */
longest ( struct vars * v , /* used only for debug and exec flags */
static chr *
longest ( struct vars * v ,
struct dfa * d ,
chr * start , /* where the match should start */
chr * stop , /* match must end at or before here */
@ -51,11 +54,15 @@ longest(struct vars * v, /* used only for debug and exec flags */
int i ;
struct colormap * cm = d - > cm ;
/* prevent "uninitialized variable" warnings */
if ( hitstopp ! = NULL )
* hitstopp = 0 ;
/* initialize */
css = initialize ( v , d , start ) ;
if ( css = = NULL )
return NULL ;
cp = start ;
if ( hitstopp ! = NULL )
* hitstopp = 0 ;
/* startup */
FDEBUG ( ( " +++ startup +++ \n " ) ) ;
@ -74,8 +81,14 @@ longest(struct vars * v, /* used only for debug and exec flags */
return NULL ;
css - > lastseen = cp ;
/* main loop */
/*
* This is the main text - scanning loop . It seems worth having two copies
* to avoid the overhead of REG_FTRACE tests here , even in REG_DEBUG
* builds , when you ' re not actively tracing .
*/
# ifdef REG_DEBUG
if ( v - > eflags & REG_FTRACE )
{
while ( cp < realstop )
{
FDEBUG ( ( " +++ at c%d +++ \n " , ( int ) ( css - d - > ssets ) ) ) ;
@ -92,7 +105,10 @@ longest(struct vars * v, /* used only for debug and exec flags */
ss - > lastseen = cp ;
css = ss ;
}
}
else
# endif
{
while ( cp < realstop )
{
co = GETCOLOR ( cm , * cp ) ;
@ -107,6 +123,10 @@ longest(struct vars * v, /* used only for debug and exec flags */
ss - > lastseen = cp ;
css = ss ;
}
}
if ( ISERR ( ) )
return NULL ;
/* shutdown */
FDEBUG ( ( " +++ shutdown at c%d +++ \n " , ( int ) ( css - d - > ssets ) ) ) ;
@ -117,6 +137,8 @@ longest(struct vars * v, /* used only for debug and exec flags */
co = d - > cnfa - > eos [ ( v - > eflags & REG_NOTEOL ) ? 0 : 1 ] ;
FDEBUG ( ( " color %ld \n " , ( long ) co ) ) ;
ss = miss ( v , d , css , co , cp , start ) ;
if ( ISERR ( ) )
return NULL ;
/* special case: match ended at eol? */
if ( ss ! = NULL & & ( ss - > flags & POSTSTATE ) )
return cp ;
@ -138,14 +160,17 @@ longest(struct vars * v, /* used only for debug and exec flags */
/*
* shortest - shortest - preferred matching engine
*
* On success , returns match endpoint address . Returns NULL on no match .
* Internal errors also return NULL , with v - > err set .
*/
static chr * /* endpoint, or NULL */
static chr *
shortest ( struct vars * v ,
struct dfa * d ,
chr * start , /* where the match should start */
chr * min , /* match must end at or after here */
chr * max , /* match must end at or before here */
chr * * coldp , /* store coldstart pointer here, if nonNULL */
chr * * coldp , /* store coldstart pointer here, if non- NULL */
int * hitstopp ) /* record whether hit v->stop, if non-NULL */
{
chr * cp ;
@ -156,11 +181,17 @@ shortest(struct vars * v,
struct sset * ss ;
struct colormap * cm = d - > cm ;
/* prevent "uninitialized variable" warnings */
if ( coldp ! = NULL )
* coldp = NULL ;
if ( hitstopp ! = NULL )
* hitstopp = 0 ;
/* initialize */
css = initialize ( v , d , start ) ;
if ( css = = NULL )
return NULL ;
cp = start ;
if ( hitstopp ! = NULL )
* hitstopp = 0 ;
/* startup */
FDEBUG ( ( " --- startup --- \n " ) ) ;
@ -180,8 +211,14 @@ shortest(struct vars * v,
css - > lastseen = cp ;
ss = css ;
/* main loop */
/*
* This is the main text - scanning loop . It seems worth having two copies
* to avoid the overhead of REG_FTRACE tests here , even in REG_DEBUG
* builds , when you ' re not actively tracing .
*/
# ifdef REG_DEBUG
if ( v - > eflags & REG_FTRACE )
{
while ( cp < realmax )
{
FDEBUG ( ( " --- at c%d --- \n " , ( int ) ( css - d - > ssets ) ) ) ;
@ -200,7 +237,10 @@ shortest(struct vars * v,
if ( ( ss - > flags & POSTSTATE ) & & cp > = realmin )
break ; /* NOTE BREAK OUT */
}
}
else
# endif
{
while ( cp < realmax )
{
co = GETCOLOR ( cm , * cp ) ;
@ -217,6 +257,7 @@ shortest(struct vars * v,
if ( ( ss - > flags & POSTSTATE ) & & cp > = realmin )
break ; /* NOTE BREAK OUT */
}
}
if ( ss = = NULL )
return NULL ;
@ -389,7 +430,7 @@ hash(unsigned *uv,
* initialize - hand - craft a cache entry for startup , otherwise get ready
*/
static struct sset *
initialize ( struct vars * v , /* used only for debug flags */
initialize ( struct vars * v ,
struct dfa * d ,
chr * start )
{
@ -402,6 +443,8 @@ initialize(struct vars * v, /* used only for debug flags */
else
{ /* no, must (re)build it */
ss = getvacant ( v , d , start , start ) ;
if ( ss = = NULL )
return NULL ;
for ( i = 0 ; i < d - > wordsper ; i + + )
ss - > states [ i ] = 0 ;
BSET ( ss - > states , d - > cnfa - > pre ) ;
@ -420,10 +463,20 @@ initialize(struct vars * v, /* used only for debug flags */
}
/*
* miss - handle a cache miss
* miss - handle a stateset cache miss
*
* css is the current stateset , co is the color of the current input character ,
* cp points to the character after that ( which is where we may need to test
* LACONs ) . start does not affect matching behavior but is needed for pickss '
* heuristics about which stateset cache entry to replace .
*
* Ordinarily , returns the address of the next stateset ( the one that is
* valid after consuming the input character ) . Returns NULL if no valid
* NFA states remain , ie we have a certain match failure .
* Internal errors also return NULL , with v - > err set .
*/
static struct sset * /* NULL if goes to empty set */
miss ( struct vars * v , /* used only for debug flags */
static struct sset *
miss ( struct vars * v ,
struct dfa * d ,
struct sset * css ,
pcolor co ,
@ -449,9 +502,23 @@ miss(struct vars * v, /* used only for debug flags */
}
FDEBUG ( ( " miss \n " ) ) ;
/* first, what set of states would we end up in? */
/*
* Checking for operation cancel in the inner text search loop seems
* unduly expensive . As a compromise , check during cache misses .
*/
if ( CANCEL_REQUESTED ( v - > re ) )
{
ERR ( REG_CANCEL ) ;
return NULL ;
}
/*
* What set of states would we end up in after consuming the co character ?
* We first consider PLAIN arcs that consume the character , and then look
* to see what LACON arcs could be traversed after consuming it .
*/
for ( i = 0 ; i < d - > wordsper ; i + + )
d - > work [ i ] = 0 ;
d - > work [ i ] = 0 ; /* build new stateset bitmap in d->work */
ispost = 0 ;
noprogress = 1 ;
gotstate = 0 ;
@ -468,22 +535,31 @@ miss(struct vars * v, /* used only for debug flags */
noprogress = 0 ;
FDEBUG ( ( " %d -> %d \n " , i , ca - > to ) ) ;
}
dolacons = ( gotstate ) ? ( cnfa - > flags & HASLACONS ) : 0 ;
if ( ! gotstate )
return NULL ; /* character cannot reach any new state */
dolacons = ( cnfa - > flags & HASLACONS ) ;
sawlacons = 0 ;
/* outer loop handles transitive closure of reachable-by-LACON states */
while ( dolacons )
{ /* transitive closure */
{
dolacons = 0 ;
for ( i = 0 ; i < d - > nstates ; i + + )
if ( ISBSET ( d - > work , i ) )
for ( ca = cnfa - > states [ i ] ; ca - > co ! = COLORLESS ; ca + + )
{
if ( ca - > co < cnfa - > ncolors )
continue ; /* NOTE CONTINUE */
sawlacons = 1 ;
continue ; /* not a LACON arc */
if ( ISBSET ( d - > work , ca - > to ) )
continue ; /* NOTE CONTINUE */
continue ; /* arc would be a no-op anyway */
sawlacons = 1 ; /* this LACON affects our result */
if ( ! lacon ( v , cnfa , cp , ca - > co ) )
continue ; /* NOTE CONTINUE */
{
if ( ISERR ( ) )
return NULL ;
continue ; /* LACON arc cannot be traversed */
}
if ( ISERR ( ) )
return NULL ;
BSET ( d - > work , ca - > to ) ;
dolacons = 1 ;
if ( ca - > to = = cnfa - > post )
@ -493,11 +569,9 @@ miss(struct vars * v, /* used only for debug flags */
FDEBUG ( ( " %d :> %d \n " , i , ca - > to ) ) ;
}
}
if ( ! gotstate )
return NULL ;
h = HASH ( d - > work , d - > wordsper ) ;
/* next, is that in the cache? */
/* Is this stateset already in the cache? */
for ( p = d - > ssets , i = d - > nssused ; i > 0 ; p + + , i - - )
if ( HIT ( h , d - > work , p , d - > wordsper ) )
{
@ -507,6 +581,8 @@ miss(struct vars * v, /* used only for debug flags */
if ( i = = 0 )
{ /* nope, need a new cache entry */
p = getvacant ( v , d , cp , start ) ;
if ( p = = NULL )
return NULL ;
assert ( p ! = css ) ;
for ( i = 0 ; i < d - > wordsper ; i + + )
p - > states [ i ] = d - > work [ i ] ;
@ -517,8 +593,15 @@ miss(struct vars * v, /* used only for debug flags */
/* lastseen to be dealt with by caller */
}
/*
* Link new stateset to old , unless a LACON affected the result , in which
* case we don ' t create the link . That forces future transitions across
* this same arc ( same prior stateset and character color ) to come through
* miss ( ) again , so that we can recheck the LACON ( s ) , which might or might
* not pass since context will be different .
*/
if ( ! sawlacons )
{ /* lookahead conds. always cache miss */
{
FDEBUG ( ( " c%d[%d]->c%d \n " ,
( int ) ( css - d - > ssets ) , co , ( int ) ( p - d - > ssets ) ) ) ;
css - > outs [ co ] = p ;
@ -562,11 +645,12 @@ lacon(struct vars * v,
/*
* getvacant - get a vacant state set
*
* This routine clears out the inarcs and outarcs , but does not otherwise
* clear the innards of the state set - - that ' s up to the caller .
*/
static struct sset *
getvacant ( struct vars * v , /* used only for debug flags */
getvacant ( struct vars * v ,
struct dfa * d ,
chr * cp ,
chr * start )
@ -578,6 +662,8 @@ getvacant(struct vars * v, /* used only for debug flags */
color co ;
ss = pickss ( v , d , cp , start ) ;
if ( ss = = NULL )
return NULL ;
assert ( ! ( ss - > flags & LOCKED ) ) ;
/* clear out its inarcs, including self-referential ones */
@ -635,7 +721,7 @@ getvacant(struct vars * v, /* used only for debug flags */
* pickss - pick the next stateset to be used
*/
static struct sset *
pickss ( struct vars * v , /* used only for debug flags */
pickss ( struct vars * v ,
struct dfa * d ,
chr * cp ,
chr * start )
@ -691,7 +777,6 @@ pickss(struct vars * v, /* used only for debug flags */
/* nobody's old enough?!? -- something's really wrong */
FDEBUG ( ( " cannot find victim to replace! \n " ) ) ;
assert ( NOTREACHED ) ;
ERR ( REG_ASSERT ) ;
return d - > ssets ;
return NULL ;
}