@ -8,7 +8,7 @@
*
*
*
*
* IDENTIFICATION
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / utils / adt / regexp . c , v 1.60 .2 .3 2007 / 01 / 03 22 : 39 : 42 tgl Exp $
* $ PostgreSQL : pgsql / src / backend / utils / adt / regexp . c , v 1.60 .2 .4 2008 / 03 / 19 02 : 41 : 00 tgl Exp $
*
*
* Alistair Crooks added the code for the regex caching
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there ' s a good chance
* agc - cached the regular expressions used - there ' s a good chance
@ -192,36 +192,31 @@ RE_compile_and_cache(text *text_re, int cflags)
}
}
/*
/*
* RE_compile_and_ execute - compile and execute a RE
* RE_execute - execute a RE
*
*
* Returns TRUE on match , FALSE on no match
* Returns TRUE on match , FALSE on no match
*
*
* text_ re - - - the pattern , expressed as an * untoasted * TEXT object
* re - - - the compiled pattern as returned by RE_compile_and_cache
* dat - - - the data to match against ( need not be null - terminated )
* dat - - - the data to match against ( need not be null - terminated )
* dat_len - - - the length of the data string
* dat_len - - - the length of the data string
* cflags - - - compile options for the pattern
* nmatch , pmatch - - - optional return area for match details
* nmatch , pmatch - - - optional return area for match details
*
*
* Both pattern and data are given in the database encoding . We internally
* Data is given in the database encoding . We internally
* convert to array of pg_wchar which is what Spencer ' s regex package wants .
* convert to array of pg_wchar which is what Spencer ' s regex package wants .
*/
*/
static bool
static bool
RE_compile_and_ execute ( text * text_ re, char * dat , int dat_len ,
RE_execute ( regex_t * re , char * dat , int dat_len ,
int cflags , int nmatch , regmatch_t * pmatch )
int nmatch , regmatch_t * pmatch )
{
{
pg_wchar * data ;
pg_wchar * data ;
size_t data_len ;
size_t data_len ;
int regexec_result ;
int regexec_result ;
regex_t * re ;
char errMsg [ 100 ] ;
char errMsg [ 100 ] ;
/* Convert data string to wide characters */
/* Convert data string to wide characters */
data = ( pg_wchar * ) palloc ( ( dat_len + 1 ) * sizeof ( pg_wchar ) ) ;
data = ( pg_wchar * ) palloc ( ( dat_len + 1 ) * sizeof ( pg_wchar ) ) ;
data_len = pg_mb2wchar_with_len ( dat , data , dat_len ) ;
data_len = pg_mb2wchar_with_len ( dat , data , dat_len ) ;
/* Compile RE */
re = RE_compile_and_cache ( text_re , cflags ) ;
/* Perform RE match and return result */
/* Perform RE match and return result */
regexec_result = pg_regexec ( re ,
regexec_result = pg_regexec ( re ,
data ,
data ,
@ -246,6 +241,33 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
return ( regexec_result = = REG_OKAY ) ;
return ( regexec_result = = REG_OKAY ) ;
}
}
/*
* RE_compile_and_execute - compile and execute a RE
*
* Returns TRUE on match , FALSE on no match
*
* text_re - - - the pattern , expressed as an * untoasted * TEXT object
* dat - - - the data to match against ( need not be null - terminated )
* dat_len - - - the length of the data string
* cflags - - - compile options for the pattern
* nmatch , pmatch - - - optional return area for match details
*
* Both pattern and data are given in the database encoding . We internally
* convert to array of pg_wchar which is what Spencer ' s regex package wants .
*/
static bool
RE_compile_and_execute ( text * text_re , char * dat , int dat_len ,
int cflags , int nmatch , regmatch_t * pmatch )
{
regex_t * re ;
/* Compile RE */
re = RE_compile_and_cache ( text_re , cflags ) ;
/* Perform RE match and return result */
return RE_execute ( re , dat , dat_len , nmatch , pmatch ) ;
}
/*
/*
* assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
* assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
@ -400,8 +422,13 @@ textregexsubstr(PG_FUNCTION_ARGS)
{
{
text * s = PG_GETARG_TEXT_P ( 0 ) ;
text * s = PG_GETARG_TEXT_P ( 0 ) ;
text * p = PG_GETARG_TEXT_P ( 1 ) ;
text * p = PG_GETARG_TEXT_P ( 1 ) ;
bool match ;
regex_t * re ;
regmatch_t pmatch [ 2 ] ;
regmatch_t pmatch [ 2 ] ;
int so ,
eo ;
/* Compile RE */
re = RE_compile_and_cache ( p , regex_flavor ) ;
/*
/*
* We pass two regmatch_t structs to get info about the overall match and
* We pass two regmatch_t structs to get info about the overall match and
@ -409,36 +436,39 @@ textregexsubstr(PG_FUNCTION_ARGS)
* is a parenthesized subexpression , we return what it matched ; else
* is a parenthesized subexpression , we return what it matched ; else
* return what the whole regexp matched .
* return what the whole regexp matched .
*/
*/
match = RE_compile_and_execute ( p ,
if ( ! RE_execute ( re ,
VARDATA ( s ) ,
VARDATA ( s ) , VARSIZE ( s ) - VARHDRSZ ,
VARSIZE ( s ) - VARHDRSZ ,
2 , pmatch ) )
regex_flavor ,
PG_RETURN_NULL ( ) ; /* definitely no match */
2 , pmatch ) ;
/* match? then return the substring matching the pattern */
if ( re - > re_nsub > 0 )
if ( match )
{
{
int so ,
/* has parenthesized subexpressions, use the first one */
eo ;
so = pmatch [ 1 ] . rm_so ;
so = pmatch [ 1 ] . rm_so ;
eo = pmatch [ 1 ] . rm_eo ;
eo = pmatch [ 1 ] . rm_eo ;
if ( so < 0 | | eo < 0 )
}
else
{
{
/* no parenthesized subexpression */
/* no parenthesized subexpression, use whole match */
so = pmatch [ 0 ] . rm_so ;
so = pmatch [ 0 ] . rm_so ;
eo = pmatch [ 0 ] . rm_eo ;
eo = pmatch [ 0 ] . rm_eo ;
}
}
/*
* It is possible to have a match to the whole pattern but no match
* for a subexpression ; for example ' foo ( bar ) ? ' is considered to match
* ' foo ' but there is no subexpression match . So this extra test for
* match failure is not redundant .
*/
if ( so < 0 | | eo < 0 )
PG_RETURN_NULL ( ) ;
return DirectFunctionCall3 ( text_substr ,
return DirectFunctionCall3 ( text_substr ,
PointerGetDatum ( s ) ,
PointerGetDatum ( s ) ,
Int32GetDatum ( so + 1 ) ,
Int32GetDatum ( so + 1 ) ,
Int32GetDatum ( eo - so ) ) ;
Int32GetDatum ( eo - so ) ) ;
}
}
PG_RETURN_NULL ( ) ;
}
/*
/*
* textregexreplace_noopt ( )
* textregexreplace_noopt ( )
* Return a string matched by a regular expression , with replacement .
* Return a string matched by a regular expression , with replacement .