@ -233,13 +233,6 @@ static int cmp(const chr *, const chr *, size_t);
static int casecmp ( const chr * , const chr * , size_t ) ;
static int casecmp ( const chr * , const chr * , size_t ) ;
/* info we need during compilation about a known capturing subexpression */
struct subinfo
{
struct state * left ; /* left end of its sub-NFA */
struct state * right ; /* right end of its sub-NFA */
} ;
/* internal variables, bundled for easy passing around */
/* internal variables, bundled for easy passing around */
struct vars
struct vars
{
{
@ -252,10 +245,10 @@ struct vars
int nexttype ; /* type of next token */
int nexttype ; /* type of next token */
chr nextvalue ; /* value (if any) of next token */
chr nextvalue ; /* value (if any) of next token */
int lexcon ; /* lexical context type (see regc_lex.c) */
int lexcon ; /* lexical context type (see regc_lex.c) */
int nsubexp ; /* number of known capturing subexpressions */
int nsubexp ; /* subexpression count */
struct subinfo * subs ; /* info about known capturing subexpressions */
struct subre * * subs ; /* subRE pointer vector */
size_t nsubs ; /* allocated length of subs[] vector */
size_t nsubs ; /* length of vector */
struct subinfo sub10 [ 10 ] ; /* initial vector, enough for most */
struct subre * sub10 [ 10 ] ; /* initial vector, enough for most */
struct nfa * nfa ; /* the NFA */
struct nfa * nfa ; /* the NFA */
struct colormap * cm ; /* character color map */
struct colormap * cm ; /* character color map */
color nlcolor ; /* color of newline */
color nlcolor ; /* color of newline */
@ -375,7 +368,7 @@ pg_regcomp(regex_t *re,
v - > subs = v - > sub10 ;
v - > subs = v - > sub10 ;
v - > nsubs = 10 ;
v - > nsubs = 10 ;
for ( j = 0 ; j < v - > nsubs ; j + + )
for ( j = 0 ; j < v - > nsubs ; j + + )
v - > subs [ j ] . left = v - > subs [ j ] . right = NULL ;
v - > subs [ j ] = NULL ;
v - > nfa = NULL ;
v - > nfa = NULL ;
v - > cm = NULL ;
v - > cm = NULL ;
v - > nlcolor = COLORLESS ;
v - > nlcolor = COLORLESS ;
@ -511,13 +504,13 @@ pg_regcomp(regex_t *re,
}
}
/*
/*
* moresubs - enlarge capturing - subexpressions vector
* moresubs - enlarge subRE vector
*/
*/
static void
static void
moresubs ( struct vars * v ,
moresubs ( struct vars * v ,
int wanted ) /* want enough room for this one */
int wanted ) /* want enough room for this one */
{
{
struct subinfo * p ;
struct subre * * p ;
size_t n ;
size_t n ;
assert ( wanted > 0 & & ( size_t ) wanted > = v - > nsubs ) ;
assert ( wanted > 0 & & ( size_t ) wanted > = v - > nsubs ) ;
@ -525,13 +518,13 @@ moresubs(struct vars *v,
if ( v - > subs = = v - > sub10 )
if ( v - > subs = = v - > sub10 )
{
{
p = ( struct subinfo * ) MALLOC ( n * sizeof ( struct subinfo ) ) ;
p = ( struct subre * * ) MALLOC ( n * sizeof ( struct subre * ) ) ;
if ( p ! = NULL )
if ( p ! = NULL )
memcpy ( VS ( p ) , VS ( v - > subs ) ,
memcpy ( VS ( p ) , VS ( v - > subs ) ,
v - > nsubs * sizeof ( struct subinfo ) ) ;
v - > nsubs * sizeof ( struct subre * ) ) ;
}
}
else
else
p = ( struct subinfo * ) REALLOC ( v - > subs , n * sizeof ( struct subinfo ) ) ;
p = ( struct subre * * ) REALLOC ( v - > subs , n * sizeof ( struct subre * ) ) ;
if ( p = = NULL )
if ( p = = NULL )
{
{
ERR ( REG_ESPACE ) ;
ERR ( REG_ESPACE ) ;
@ -539,7 +532,7 @@ moresubs(struct vars *v,
}
}
v - > subs = p ;
v - > subs = p ;
for ( p = & v - > subs [ v - > nsubs ] ; v - > nsubs < n ; p + + , v - > nsubs + + )
for ( p = & v - > subs [ v - > nsubs ] ; v - > nsubs < n ; p + + , v - > nsubs + + )
p - > left = p - > right = NULL ;
* p = NULL ;
assert ( v - > nsubs = = n ) ;
assert ( v - > nsubs = = n ) ;
assert ( ( size_t ) wanted < v - > nsubs ) ;
assert ( ( size_t ) wanted < v - > nsubs ) ;
}
}
@ -988,6 +981,7 @@ parseqatom(struct vars *v,
s = newstate ( v - > nfa ) ;
s = newstate ( v - > nfa ) ;
s2 = newstate ( v - > nfa ) ;
s2 = newstate ( v - > nfa ) ;
NOERRN ( ) ;
NOERRN ( ) ;
/* We may not need these arcs, but keep things connected for now */
EMPTYARC ( lp , s ) ;
EMPTYARC ( lp , s ) ;
EMPTYARC ( s2 , rp ) ;
EMPTYARC ( s2 , rp ) ;
NOERRN ( ) ;
NOERRN ( ) ;
@ -997,10 +991,6 @@ parseqatom(struct vars *v,
NOERRN ( ) ;
NOERRN ( ) ;
if ( cap )
if ( cap )
{
{
/* save the sub-NFA's endpoints for future backrefs to use */
assert ( v - > subs [ subno ] . left = = NULL ) ;
v - > subs [ subno ] . left = s ;
v - > subs [ subno ] . right = s2 ;
if ( atom - > capno = = 0 )
if ( atom - > capno = = 0 )
{
{
/* normal case: just mark the atom as capturing */
/* normal case: just mark the atom as capturing */
@ -1016,13 +1006,15 @@ parseqatom(struct vars *v,
t - > child = atom ;
t - > child = atom ;
atom = t ;
atom = t ;
}
}
assert ( v - > subs [ subno ] = = NULL ) ;
v - > subs [ subno ] = atom ;
}
}
/* postpone everything else pending possible {0} */
/* postpone everything else pending possible {0} */
break ;
break ;
case BACKREF : /* the Feature From The Black Lagoon */
case BACKREF : /* the Feature From The Black Lagoon */
INSIST ( type ! = LACON , REG_ESUBREG ) ;
INSIST ( type ! = LACON , REG_ESUBREG ) ;
INSIST ( v - > nextvalue < v - > nsubs , REG_ESUBREG ) ;
INSIST ( v - > nextvalue < v - > nsubs , REG_ESUBREG ) ;
INSIST ( v - > subs [ v - > nextvalue ] . left ! = NULL , REG_ESUBREG ) ;
INSIST ( v - > subs [ v - > nextvalue ] ! = NULL , REG_ESUBREG ) ;
NOERRN ( ) ;
NOERRN ( ) ;
assert ( v - > nextvalue > 0 ) ;
assert ( v - > nextvalue > 0 ) ;
atom = subre ( v , ' b ' , BACKR , lp , rp ) ;
atom = subre ( v , ' b ' , BACKR , lp , rp ) ;
@ -1097,7 +1089,7 @@ parseqatom(struct vars *v,
if ( atom ! = NULL )
if ( atom ! = NULL )
freesubre ( v , atom ) ;
freesubre ( v , atom ) ;
if ( atomtype = = ' ( ' )
if ( atomtype = = ' ( ' )
v - > subs [ subno ] . left = v - > subs [ subno ] . right = NULL ;
v - > subs [ subno ] = NULL ;
delsub ( v - > nfa , lp , rp ) ;
delsub ( v - > nfa , lp , rp ) ;
EMPTYARC ( lp , rp ) ;
EMPTYARC ( lp , rp ) ;
return top ;
return top ;
@ -1130,30 +1122,48 @@ parseqatom(struct vars *v,
NOERRN ( ) ;
NOERRN ( ) ;
}
}
/*
* For what follows , we need the atom to have its own begin / end states
* that are distinct from lp / rp , so that we can wrap iteration structure
* around it . The parenthesized - atom case above already made suitable
* states ( and we don ' t want to modify a capturing subre , since it ' s
* already recorded in v - > subs [ ] ) . Otherwise , we need more states .
*/
if ( atom - > begin = = lp | | atom - > end = = rp )
{
s = newstate ( v - > nfa ) ;
s2 = newstate ( v - > nfa ) ;
NOERRN ( ) ;
moveouts ( v - > nfa , lp , s ) ;
moveins ( v - > nfa , rp , s2 ) ;
atom - > begin = s ;
atom - > end = s2 ;
}
else
{
/* The atom's OK, but we must temporarily disconnect it from lp/rp */
/* (this removes the EMPTY arcs we made above) */
delsub ( v - > nfa , lp , atom - > begin ) ;
delsub ( v - > nfa , atom - > end , rp ) ;
}
/*----------
/*----------
* Prepare a general - purpose state skeleton .
* Prepare a general - purpose state skeleton .
*
*
* In the no - backrefs case , we want this :
* In the no - backrefs case , we want this :
*
*
* [ lp ] - - - > [ s ] - - - prefix - - - > [ begin ] - - - atom - - - > [ end ] - - - rest - - - > [ rp ]
* [ lp ] - - - > [ s ] - - - prefix - - - > - - - atom - - - > - - - rest - - - > [ rp ]
*
*
* where prefix is some repetitions of atom . In the general case we need
* where prefix is some repetitions of atom , and " rest " is the remainder
* of the branch . In the general case we need :
*
*
* [ lp ] - - - > [ s ] - - - iterator - - - > [ s2 ] - - - rest - - - > [ rp ]
* [ lp ] - - - > [ s ] - - - iterator - - - > [ s2 ] - - - rest - - - > [ rp ]
*
*
* where the iterator wraps around [ begin ] - - - atom - - - > [ end ]
* where the iterator wraps around the atom .
*
*
* We make the s state here for both cases ; s2 is made below if needed
* We make the s state here for both cases ; s2 is made below if needed
* - - - - - - - - - -
* - - - - - - - - - -
*/
*/
s = newstate ( v - > nfa ) ; /* first, new endpoints for the atom */
s2 = newstate ( v - > nfa ) ;
NOERRN ( ) ;
moveouts ( v - > nfa , lp , s ) ;
moveins ( v - > nfa , rp , s2 ) ;
NOERRN ( ) ;
atom - > begin = s ;
atom - > end = s2 ;
s = newstate ( v - > nfa ) ; /* set up starting state */
s = newstate ( v - > nfa ) ; /* set up starting state */
NOERRN ( ) ;
NOERRN ( ) ;
EMPTYARC ( lp , s ) ;
EMPTYARC ( lp , s ) ;
@ -1190,14 +1200,14 @@ parseqatom(struct vars *v,
{
{
assert ( atom - > begin - > nouts = = 1 ) ; /* just the EMPTY */
assert ( atom - > begin - > nouts = = 1 ) ; /* just the EMPTY */
delsub ( v - > nfa , atom - > begin , atom - > end ) ;
delsub ( v - > nfa , atom - > begin , atom - > end ) ;
assert ( v - > subs [ subno ] . left ! = NULL ) ;
assert ( v - > subs [ subno ] ! = NULL ) ;
/*
/*
* And here ' s why the recursion got postponed : it must wait until the
* And here ' s why the recursion got postponed : it must wait until the
* skeleton is filled in , because it may hit a backref that wants to
* skeleton is filled in , because it may hit a backref that wants to
* copy the filled - in skeleton .
* copy the filled - in skeleton .
*/
*/
dupnfa ( v - > nfa , v - > subs [ subno ] . left , v - > subs [ subno ] . right ,
dupnfa ( v - > nfa , v - > subs [ subno ] - > begin , v - > subs [ subno ] - > end ,
atom - > begin , atom - > end ) ;
atom - > begin , atom - > end ) ;
NOERRN ( ) ;
NOERRN ( ) ;