Fix regexp_matches() handling of zero-length matches.

We'd find the same match twice if it was of zero length and not immediately
adjacent to the previous match.  replace_text_regexp() got similar cases
right, so adjust this search logic to match that.  Note that even though
the regexp_split_to_xxx() functions share this code, they did not display
equivalent misbehavior, because the second match would be considered
degenerate and ignored.

Jeevan Chalke, with some cosmetic changes by me.
REL8_4_STABLE
Tom Lane 13 years ago
parent 21c2d4cd62
commit b2bdb7b76f
  1. 13
      src/backend/utils/adt/regexp.c
  2. 5
      src/backend/utils/adt/varlena.c
  3. 58
      src/test/regress/expected/strings.out
  4. 7
      src/test/regress/sql/strings.sql

@ -937,14 +937,13 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
break;
/*
* Advance search position. Normally we start just after the end of
* the previous match, but always advance at least one character (the
* special case can occur if the pattern matches zero characters just
* after the prior match or at the end of the string).
* Advance search position. Normally we start the next search at the
* end of the previous match; but if the match was of zero length, we
* have to advance by one character, or we'd just find the same match
* again.
*/
if (start_search < pmatch[0].rm_eo)
start_search = pmatch[0].rm_eo;
else
start_search = prev_match_end;
if (pmatch[0].rm_so == pmatch[0].rm_eo)
start_search++;
if (start_search > wide_len)
break;

@ -2624,7 +2624,10 @@ replace_text_regexp(text *src_text, void *regexp,
break;
/*
* Search from next character when the matching text is zero width.
* Advance search position. Normally we start the next search at the
* end of the previous match; but if the match was of zero length, we
* have to advance by one character, or we'd just find the same match
* again.
*/
search_start = data_pos;
if (pmatch[0].rm_so == pmatch[0].rm_eo)

@ -347,6 +347,64 @@ SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
{barbeque}
(1 row)
-- start/end-of-line matches are of zero length
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
regexp_matches
----------------
{""}
{""}
{""}
{""}
(4 rows)
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
regexp_matches
----------------
{""}
{""}
{""}
{""}
(4 rows)
SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
regexp_matches
----------------
{1}
{2}
{3}
{4}
{""}
(5 rows)
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
regexp_matches
----------------
{""}
{1}
{""}
{2}
{""}
{3}
{""}
{4}
{""}
{""}
(10 rows)
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
regexp_matches
----------------
{""}
{1}
{""}
{2}
{""}
{3}
{""}
{4}
{""}
(9 rows)
-- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
ERROR: invalid regexp option: "z"

@ -137,6 +137,13 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
-- no capture groups
SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
-- start/end-of-line matches are of zero length
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
-- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);

Loading…
Cancel
Save