@ -674,10 +674,8 @@ similar_escape_internal(text *pat_text, text *esc_text)
elen ;
bool afterescape = false ;
int nquotes = 0 ;
int charclass_depth = 0 ; /* Nesting level of character classes,
* encompassed by square brackets */
int charclass_start = 0 ; /* State of the character class start,
* for carets */
int bracket_depth = 0 ; /* square bracket nesting level */
int charclass_pos = 0 ; /* position inside a character class */
p = VARDATA_ANY ( pat_text ) ;
plen = VARSIZE_ANY_EXHDR ( pat_text ) ;
@ -736,6 +734,17 @@ similar_escape_internal(text *pat_text, text *esc_text)
* the relevant part separators in the above expansion . If the result
* of this function is used in a plain regexp match ( SIMILAR TO ) , the
* escape - double - quotes have no effect on the match behavior .
*
* While we don ' t fully validate character classes ( bracket expressions ) ,
* we do need to parse them well enough to know where they end .
* " charclass_pos " tracks where we are in a character class .
* Its value is uninteresting when bracket_depth is 0.
* But when bracket_depth > 0 , it will be
* 1 : right after the opening ' [ ' ( a following ' ^ ' will negate
* the class , while ' ] ' is a literal character )
* 2 : right after a ' ^ ' after the opening ' [ ' ( ' ] ' is still a literal
* character )
* 3 or more : further inside the character class ( ' ] ' ends the class )
* - - - - - - - - - -
*/
@ -807,7 +816,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
/* fast path */
if ( afterescape )
{
if ( pchar = = ' " ' & & charclass _depth < 1 ) /* escape-double-quote? */
if ( pchar = = ' " ' & & bracket _depth < 1 ) /* escape-double-quote? */
{
/* emit appropriate part separator, per notes above */
if ( nquotes = = 0 )
@ -848,6 +857,12 @@ similar_escape_internal(text *pat_text, text *esc_text)
*/
* r + + = ' \\ ' ;
* r + + = pchar ;
/*
* If we encounter an escaped character in a character class ,
* we are no longer at the beginning .
*/
charclass_pos = 3 ;
}
afterescape = false ;
}
@ -856,41 +871,69 @@ similar_escape_internal(text *pat_text, text *esc_text)
/* SQL escape character; do not send to output */
afterescape = true ;
}
else if ( charclass _depth > 0 )
else if ( bracket _depth > 0 )
{
/* inside a character class */
if ( pchar = = ' \\ ' )
{
/*
* If we ' re here , backslash is not the SQL escape character ,
* so treat it as a literal class element , which requires
* doubling it . ( This matches our behavior for backslashes
* outside character classes . )
*/
* r + + = ' \\ ' ;
}
* r + + = pchar ;
/*
* Ignore a closing bracket at the start of a character class .
* Such a bracket is taken literally rather than closing the
* class . " charclass_start " is 1 right at the beginning of a
* class and 2 after an initial caret .
*/
if ( pchar = = ' ] ' & & charclass_start > 2 )
charclass_depth - - ;
/* parse the character class well enough to identify ending ']' */
if ( pchar = = ' ] ' & & charclass_pos > 2 )
{
/* found the real end of a bracket pair */
bracket_depth - - ;
/* don't reset charclass_pos, this may be an inner bracket */
}
else if ( pchar = = ' [ ' )
charclass_depth + + ;
{
/* start of a nested bracket pair */
bracket_depth + + ;
/*
* If there is a caret right after the opening bracket , it negates
* the character class , but a following closing bracket should
* still be treated as a normal character . That holds only for
* the first caret , so only the values 1 and 2 mean that closing
* brackets should be taken literally .
*/
if ( pchar = = ' ^ ' )
charclass_start + + ;
/*
* We are no longer at the beginning of a character class .
* ( The nested bracket pair is a collating element , not a
* character class in its own right . )
*/
charclass_pos = 3 ;
}
else if ( pchar = = ' ^ ' )
{
/*
* A caret right after the opening bracket negates the
* character class . In that case , the following will
* increment charclass_pos from 1 to 2 , so that a following
* ' ] ' is still a literal character and does not end the
* character class . If we are further inside a character
* class , charclass_pos might get incremented past 3 , which is
* fine .
*/
charclass_pos + + ;
}
else
charclass_start = 3 ; /* definitely past the start */
{
/*
* Anything else ( including a backslash or leading ' ] ' ) is an
* element of the character class , so we are no longer at the
* beginning of the class .
*/
charclass_pos = 3 ;
}
}
else if ( pchar = = ' [ ' )
{
/* start of a character class */
* r + + = pchar ;
charclass_depth + + ;
charclass_start = 1 ;
bracket_depth = 1 ;
charclass_po s = 1 ;
}
else if ( pchar = = ' % ' )
{