regex - Update internal regex to latest version

Updated using the openbsd github repo using the code in this directory:
https://github.com/openbsd/src/tree/master/lib/libc/regex

This build will not function without its child commit, which introduces
clam specific modifications. The two have been separated to make future
upgrades easier.
pull/570/head
Mickey Sola 3 years ago committed by Micah Snyder
parent d1746ba0a5
commit 87cdd70037
  1. 40
      libclamav/regex/cclass.h
  2. 6
      libclamav/regex/cname.h
  3. 248
      libclamav/regex/engine.c
  4. 369
      libclamav/regex/regcomp.c
  5. 35
      libclamav/regex/regerror.c
  6. 52
      libclamav/regex/regex2.h
  7. 32
      libclamav/regex/regexec.c
  8. 28
      libclamav/regex/regfree.c
  9. 16
      libclamav/regex/utils.h

@ -1,6 +1,6 @@
/* $OpenBSD: cclass.h,v 1.7 2020/12/30 08:54:42 tb Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -36,33 +36,25 @@
*/
/* character-class table */
static struct cclass {
static const struct cclass {
const char *name;
const char *chars;
const char *multis;
} cclasses[] = {
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789", ""} ,
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
""} ,
{ "blank", " \t", ""} ,
0123456789" },
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" },
{ "blank", " \t" },
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
{ "digit", "0123456789", ""} ,
\25\26\27\30\31\32\33\34\35\36\37\177" },
{ "digit", "0123456789" },
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
""} ,
{ "lower", "abcdefghijklmnopqrstuvwxyz",
""} ,
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" },
{ "lower", "abcdefghijklmnopqrstuvwxyz" },
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
""} ,
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
""} ,
{ "space", "\t\n\v\f\r ", ""} ,
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
""} ,
{ "xdigit", "0123456789ABCDEFabcdef",
""} ,
{ NULL, 0, "" }
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ " },
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" },
{ "space", "\t\n\v\f\r " },
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ" },
{ "xdigit", "0123456789ABCDEFabcdef" },
{ NULL, 0 }
};

@ -1,6 +1,6 @@
/* $OpenBSD: cname.h,v 1.6 2020/12/30 08:53:30 tb Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -36,7 +36,7 @@
*/
/* character-name table */
static struct cname {
static const struct cname {
const char *name;
char code;
} cnames[] = {

@ -1,6 +1,6 @@
/* $OpenBSD: engine.c,v 1.26 2020/12/28 21:41:55 millert Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -72,11 +72,11 @@ struct match {
struct re_guts *g;
int eflags;
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
char *offp; /* offsets work from here */
char *beginp; /* start of string -- virtual NUL precedes */
char *endp; /* end of string -- virtual NUL here */
char *coldp; /* can be no match starting before here */
char **lastpos; /* [nplus+1] */
const char *offp; /* offsets work from here */
const char *beginp; /* start of string -- virtual NUL precedes */
const char *endp; /* end of string -- virtual NUL here */
const char *coldp; /* can be no match starting before here */
const char **lastpos; /* [nplus+1] */
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
@ -84,11 +84,15 @@ struct match {
states empty; /* empty set of states */
};
static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int);
static char *dissect(struct match *, char *, char *, sopno, sopno);
static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
static char *fast(struct match *, char *, char *, sopno, sopno);
static char *slow(struct match *, char *, char *, sopno, sopno);
static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
static const char *dissect(struct match *, const char *, const char *, sopno,
sopno);
static const char *backref(struct match *, const char *, const char *, sopno,
sopno, sopno, int);
static const char *fast(struct match *, const char *, const char *, sopno,
sopno);
static const char *slow(struct match *, const char *, const char *, sopno,
sopno);
static states step(struct re_guts *, sopno, sopno, states, int, states);
#define MAX_RECURSION 100
#define BOL (OUT+1)
@ -97,17 +101,19 @@ static states step(struct re_guts *, sopno, sopno, states, int, states);
#define NOTHING (BOL+3)
#define BOW (BOL+4)
#define EOW (BOL+5)
/* update nonchars[] array below when adding fake chars here */
#define CODEMAX (BOL+5) /* highest code used */
#define NONCHAR(c) ((c) > CHAR_MAX)
#define NNONCHAR (CODEMAX-CHAR_MAX)
#ifdef REDEBUG
static void print(struct match *, char *, states, int, FILE *);
static void print(struct match *, const char *, states, int, FILE *);
#endif
#ifdef REDEBUG
static void at(struct match *, char *, char *, char *, sopno, sopno);
static void at(struct match *, const char *, const char *, const char *,
sopno, sopno);
#endif
#ifdef REDEBUG
static char *pchar(int);
static const char *pchar(int);
#endif
#ifdef REDEBUG
@ -125,18 +131,18 @@ static int nope = 0;
- matcher - the actual matching engine
*/
static int /* 0 success, REG_NOMATCH failure */
matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
int eflags)
matcher(struct re_guts *g, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
char *endp;
size_t i;
const char *endp;
int i;
struct match mv;
struct match *m = &mv;
char *dp;
const char *dp;
const sopno gf = g->firststate+1; /* +1 for OEND */
const sopno gl = g->laststate;
char *start;
char *stop;
const char *start;
const char *stop;
/* simplify the situation where possible */
if (g->cflags&REG_NOSUB)
@ -155,7 +161,7 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
if (g->must != NULL) {
for (dp = start; dp < stop; dp++)
if (*dp == g->must[0] && stop - dp >= g->mlen &&
memcmp(dp, g->must, (size_t)g->mlen) == 0)
memcmp(dp, g->must, g->mlen) == 0)
break;
if (dp == stop) /* we didn't find g->must */
return(REG_NOMATCH);
@ -203,10 +209,9 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* oh my, he wants the subexpressions... */
if (m->pmatch == NULL)
m->pmatch = (regmatch_t *)cli_malloc((m->g->nsub + 1) *
sizeof(regmatch_t));
m->pmatch = reallocarray(NULL, m->g->nsub + 1,
sizeof(regmatch_t));
if (m->pmatch == NULL) {
free(m->lastpos);
STATETEARDOWN(m);
return(REG_ESPACE);
}
@ -217,8 +222,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
m->lastpos = (char **)cli_malloc((g->nplus+1) *
sizeof(char *));
m->lastpos = reallocarray(NULL,
g->nplus+1, sizeof(char *));
if (g->nplus > 0 && m->lastpos == NULL) {
free(m->pmatch);
STATETEARDOWN(m);
@ -277,10 +282,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
}
}
if (m->pmatch != NULL)
free((char *)m->pmatch);
if (m->lastpos != NULL)
free((char *)m->lastpos);
free(m->pmatch);
free(m->lastpos);
STATETEARDOWN(m);
return(0);
}
@ -288,22 +291,23 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/*
- dissect - figure out what matched what, no back references
*/
static char * /* == stop (success) always */
dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
static const char * /* == stop (success) always */
dissect(struct match *m, const char *start, const char *stop, sopno startst,
sopno stopst)
{
int i;
sopno ss; /* start sop of current subRE */
sopno es; /* end sop of current subRE */
char *sp; /* start of string matched by it */
char *stp; /* string matched by it cannot pass here */
char *rest; /* start of rest of string */
char *tail; /* string unmatched by rest of RE */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
char *ssp; /* start of string matched by subsubRE */
char *sep; /* end of string matched by subsubRE */
char *oldssp; /* previous ssp */
char *dp;
sopno ss; /* start sop of current subRE */
sopno es; /* end sop of current subRE */
const char *sp; /* start of string matched by it */
const char *stp; /* string matched by it cannot pass here */
const char *rest; /* start of rest of string */
const char *tail; /* string unmatched by rest of RE */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
const char *ssp; /* start of string matched by subsubRE */
const char *sep; /* end of string matched by subsubRE */
const char *oldssp; /* previous ssp */
const char *dp;
AT("diss", start, stop, startst, stopst);
sp = start;
@ -448,12 +452,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
break;
case OLPAREN:
i = OPND(m->g->strip[ss]);
assert(0 < i && (size_t)i <= m->g->nsub);
assert(0 < i && i <= m->g->nsub);
m->pmatch[i].rm_so = sp - m->offp;
break;
case ORPAREN:
i = OPND(m->g->strip[ss]);
assert(0 < i && (size_t)i <= m->g->nsub);
assert(0 < i && i <= m->g->nsub);
m->pmatch[i].rm_eo = sp - m->offp;
break;
default: /* uh oh */
@ -469,17 +473,17 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- backref - figure out what matched what, figuring in back references
*/
static char * /* == stop (success) or NULL (failure) */
backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
sopno lev, int rec) /* PLUS nesting level */
static const char * /* == stop (success) or NULL (failure) */
backref(struct match *m, const char *start, const char *stop, sopno startst,
sopno stopst, sopno lev, int rec) /* PLUS nesting level */
{
int i;
sopno ss; /* start sop of current subRE */
char *sp; /* start of string matched by it */
const char *sp; /* start of string matched by it */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
char *ssp; /* start of string matched by subsubRE */
char *dp;
const char *ssp;/* start of string matched by subsubRE */
const char *dp;
size_t len;
int hard;
sop s;
@ -508,9 +512,9 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
return(NULL);
break;
case OBOL:
if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
(sp < m->endp && *(sp-1) == '\n' &&
(m->g->cflags&REG_NEWLINE)) )
if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
(sp > m->offp && sp < m->endp &&
*(sp-1) == '\n' && (m->g->cflags&REG_NEWLINE)))
{ /* yes */ }
else
return(NULL);
@ -524,12 +528,9 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
return(NULL);
break;
case OBOW:
if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
(sp < m->endp && *(sp-1) == '\n' &&
(m->g->cflags&REG_NEWLINE)) ||
(sp > m->beginp &&
!ISWORD(*(sp-1))) ) &&
(sp < m->endp && ISWORD(*sp)) )
if (sp < m->endp && ISWORD(*sp) &&
((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
(sp > m->offp && !ISWORD(*(sp-1)))))
{ /* yes */ }
else
return(NULL);
@ -572,14 +573,14 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
switch (OP(s)) {
case OBACK_: /* the vilest depths */
i = OPND(s);
assert(0 < i && (size_t)i <= m->g->nsub);
assert(0 < i && i <= m->g->nsub);
if (m->pmatch[i].rm_eo == -1)
return(NULL);
assert(m->pmatch[i].rm_so != -1);
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
if (len == 0 && rec++ > MAX_RECURSION)
return(NULL);
assert((size_t)(stop - m->beginp) >= len);
assert(stop - m->beginp >= len);
if (sp > stop - len)
return(NULL); /* not enough left to match */
ssp = m->offp + m->pmatch[i].rm_so;
@ -635,7 +636,7 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
break;
case OLPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && (size_t)i <= m->g->nsub);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
@ -646,7 +647,7 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
break;
case ORPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && (size_t)i <= m->g->nsub);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
@ -663,24 +664,30 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
/* "can't happen" */
assert(nope);
/* NOTREACHED */
return(NULL);
return NULL;
}
/*
- fast - step through the string at top speed
*/
static char * /* where tentative match ended, or NULL */
fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
static const char * /* where tentative match ended, or NULL */
fast(struct match *m, const char *start, const char *stop, sopno startst,
sopno stopst)
{
states st = m->st;
states fresh = m->fresh;
states tmp = m->tmp;
char *p = start;
int c = (start == m->beginp) ? OUT : *(start-1);
int lastc; /* previous c */
const char *p = start;
int c;
int lastc; /* previous c */
int flagch;
int i;
char *coldp; /* last p after which no match was underway */
const char *coldp; /* last p after which no match was underway */
if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
c = OUT;
else
c = *(start-1);
CLEAR(st);
SET1(st, startst);
@ -698,31 +705,30 @@ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
i = 0;
if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
(lastc == OUT && !(m->eflags&REG_NOTBOL))) {
flagch = BOL;
i = m->g->nbol;
}
if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
flagch = (flagch == BOL) ? BOLEOL : EOL;
i += m->g->neol;
}
if (i != 0) {
for (; i > 0; i--)
st = step(m->g, startst, stopst, st, flagch, st);
st = step(m->g, startst, stopst,
st, flagch, st);
SP("boleol", st, c);
}
/* how about a word boundary? */
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
(c != OUT && ISWORD(c)) ) {
if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
(c != OUT && ISWORD(c)))
flagch = BOW;
}
if ( (lastc != OUT && ISWORD(lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
if ((lastc != OUT && ISWORD(lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))))
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
st = step(m->g, startst, stopst, st, flagch, st);
SP("boweow", st, c);
@ -753,18 +759,24 @@ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- slow - step through the string more deliberately
*/
static char * /* where it ended */
slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
static const char * /* where it ended */
slow(struct match *m, const char *start, const char *stop, sopno startst,
sopno stopst)
{
states st = m->st;
states empty = m->empty;
states tmp = m->tmp;
char *p = start;
int c = (start == m->beginp) ? OUT : *(start-1);
int lastc; /* previous c */
const char *p = start;
int c;
int lastc; /* previous c */
int flagch;
int i;
char *matchp; /* last p at which a match ended */
const char *matchp; /* last p at which a match ended */
if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
c = OUT;
else
c = *(start-1);
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@ -780,31 +792,30 @@ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
i = 0;
if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
(lastc == OUT && !(m->eflags&REG_NOTBOL))) {
flagch = BOL;
i = m->g->nbol;
}
if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
(c == OUT && !(m->eflags&REG_NOTEOL))) {
flagch = (flagch == BOL) ? BOLEOL : EOL;
i += m->g->neol;
}
if (i != 0) {
for (; i > 0; i--)
st = step(m->g, startst, stopst, st, flagch, st);
st = step(m->g, startst, stopst,
st, flagch, st);
SP("sboleol", st, c);
}
/* how about a word boundary? */
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
(c != OUT && ISWORD(c)) ) {
if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
(c != OUT && ISWORD(c)))
flagch = BOW;
}
if ( (lastc != OUT && ISWORD(lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
if ((lastc != OUT && ISWORD(lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))))
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
st = step(m->g, startst, stopst, st, flagch, st);
SP("sboweow", st, c);
@ -921,10 +932,10 @@ step(struct re_guts *g,
case OOR1: /* done a branch, find the O_CH */
if (ISSTATEIN(aft, here)) {
for (look = 1;
OP(s = g->strip[pc+look]) != O_CH;
look += OPND(s))
OP(s = g->strip[pc+look]) != O_CH;
look += OPND(s))
assert(OP(s) == OOR2);
FWD(aft, aft, look);
FWD(aft, aft, look + 1);
}
break;
case OOR2: /* propagate OCH_'s marking */
@ -951,7 +962,7 @@ step(struct re_guts *g,
- print - print a set of states
*/
static void
print(struct match *m, char *caption, states st, int ch, FILE *d)
print(struct match *m, const char *caption, states st, int ch, FILE *d)
{
struct re_guts *g = m->g;
int i;
@ -961,13 +972,13 @@ print(struct match *m, char *caption, states st, int ch, FILE *d)
return;
(void)fprintf(d, "%s", caption);
if (ch != '\0')
(void)fprintf(d, " %s", pchar(ch));
for (i = 0; i < g->nstates; i++)
(void)fprintf(d, " %s", pchar(ch));
for (i = 0; i < g->nstates; i++) {
if (ISSET(st, i)) {
(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
first = 0;
}
}
(void)fprintf(d, "\n");
}
@ -975,8 +986,8 @@ print(struct match *m, char *caption, states st, int ch, FILE *d)
- at - print current situation
*/
static void
at(struct match *m, char *title, char *start, char *stop, sopno startst,
sopno stopst)
at(struct match *m, const char *title, const char *start, const char *stop,
sopno startst, sopno stopst)
{
if (!(m->eflags&REG_TRACE))
return;
@ -988,6 +999,12 @@ at(struct match *m, char *title, char *start, char *stop, sopno startst,
#ifndef PCHARDONE
#define PCHARDONE /* never again */
static const char *nonchars[] =
{ "OUT", "BOL", "EOL", "BOLEOL", "NOTHING", "BOW", "EOW" };
#define PNONCHAR(c) \
((c) - OUT < (sizeof(nonchars)/sizeof(nonchars[0])) \
? nonchars[(c) - OUT] : "invalid")
/*
- pchar - make a character printable
*
@ -996,12 +1013,17 @@ at(struct match *m, char *title, char *start, char *stop, sopno startst,
* a matching debug.o, and this is convenient. It all disappears in
* the non-debug compilation anyway, so it doesn't matter much.
*/
static char * /* -> representation */
static const char * /* -> representation */
pchar(int ch)
{
static char pbuf[10];
if (isprint(ch) || ch == ' ')
if (NONCHAR(ch)) {
if (ch - OUT < (sizeof(nonchars)/sizeof(nonchars[0])))
return nonchars[ch - OUT];
return "invalid";
}
if (isprint((unsigned char)ch) || ch == ' ')
(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
else
(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);

@ -1,6 +1,5 @@
/* $OpenBSD: regcomp.c,v 1.43 2021/01/03 17:07:57 tb Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -41,10 +40,7 @@
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include "clamav.h"
#include "others.h"
#include "regex.h"
#include <regex.h>
#include "utils.h"
#include "regex2.h"
@ -57,8 +53,8 @@
* other clumsinesses
*/
struct parse {
char *next; /* next character in RE */
char *end; /* end of string (-> NUL normally) */
const char *next; /* next character in RE */
const char *end; /* end of string (-> NUL normally) */
int error; /* has an error been seen? */
sop *strip; /* malloced strip */
sopno ssize; /* malloced strip size (allocated) */
@ -85,25 +81,20 @@ static char p_b_coll_elem(struct parse *, int);
static char othercase(int);
static void bothcases(struct parse *, int);
static void ordinary(struct parse *, int);
static void backslash(struct parse *, int);
static void nonnewline(struct parse *);
static void repeat(struct parse *, sopno, int, int);
static int seterr(struct parse *, int);
static void seterr(struct parse *, int);
static cset *allocset(struct parse *);
static void freeset(struct parse *, cset *);
static int freezeset(struct parse *, cset *);
static int firstch(struct parse *, cset *);
static int nch(struct parse *, cset *);
static void mcadd(struct parse *, cset *, const char *);
static void mcinvert(struct parse *, cset *);
static void mccase(struct parse *, cset *);
static int isinsets(struct re_guts *, int);
static int samesets(struct re_guts *, int, int);
static void categorize(struct parse *, struct re_guts *);
static sopno dupl(struct parse *, sopno, sopno);
static void doemit(struct parse *, sop, size_t);
static void doinsert(struct parse *, sop, size_t, sopno);
static void dofwd(struct parse *, sopno, sop);
static void enlarge(struct parse *, sopno);
static int enlarge(struct parse *, sopno);
static void stripsnug(struct parse *, struct re_guts *);
static void findmust(struct parse *, struct re_guts *);
static sopno pluscount(struct parse *, struct re_guts *);
@ -116,10 +107,10 @@ static char nuls[10]; /* place to point scanner in event of error */
*/
#define PEEK() (*p->next)
#define PEEK2() (*(p->next+1))
#define MORE() (p->next < p->end)
#define MORE2() (p->next+1 < p->end)
#define MORE() (p->end - p->next > 0)
#define MORE2() (p->end - p->next > 1)
#define SEE(c) (MORE() && PEEK() == (c))
#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b))
#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
#define NEXT() (p->next++)
@ -127,10 +118,7 @@ static char nuls[10]; /* place to point scanner in event of error */
#define NEXTn(n) (p->next += (n))
#define GETNEXT() (*p->next++)
#define SETERROR(e) seterr(p, (e))
#define REQUIRE(co, e) (void)((co) || SETERROR(e))
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
#define REQUIRE(co, e) do { if (!(co)) SETERROR(e); } while (0)
#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
@ -147,17 +135,16 @@ static int never = 0; /* for use in asserts; shuts lint up */
#endif
/*
- cli_regcomp_real - interface for parser and compilation
- regcomp - interface for parser and compilation
*/
int /* 0 success, otherwise REG_something */
cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
regcomp(regex_t *preg, const char *pattern, int cflags)
{
struct parse pa;
struct re_guts *g;
struct parse *p = &pa;
int i;
size_t len;
size_t maxlen;
#ifdef REDEBUG
# define GOODFLAGS(f) (f)
#else
@ -173,41 +160,23 @@ cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
return(REG_INVARG);
len = preg->re_endp - pattern;
} else
len = strlen((const char *)pattern);
len = strlen((char *)pattern);
/* do the mallocs early so failure handling is easy */
g = (struct re_guts *)cli_malloc(sizeof(struct re_guts) +
(NC-1)*sizeof(cat_t));
g = malloc(sizeof(struct re_guts));
if (g == NULL)
return(REG_ESPACE);
/* Patch for bb11264 submitted by the Debian team: */
/*
* Limit the pattern space to avoid a 32-bit overflow on buffer
* extension. Also avoid any signed overflow in case of conversion
* so make the real limit based on a 31-bit overflow.
*
* Likely not applicable on 64-bit systems but handle the case
* generically (who are we to stop people from using ~715MB+
* patterns?).
*/
maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
if (len >= maxlen) {
free((char *)g);
return(REG_ESPACE);
}
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
assert((size_t)p->ssize >= len);
p->strip = (sop *)cli_calloc(p->ssize, sizeof(sop));
p->strip = reallocarray(NULL, p->ssize, sizeof(sop));
p->slen = 0;
if (p->strip == NULL) {
free((char *)g);
free(g);
return(REG_ESPACE);
}
/* set things up */
p->g = g;
p->next = (char *)pattern; /* convenience; we do not modify it */
p->next = pattern;
p->end = p->next + len;
p->error = 0;
p->ncsalloc = 0;
@ -226,9 +195,6 @@ cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
g->must = NULL;
g->mlen = 0;
g->nsub = 0;
g->ncategories = 1; /* category 0 is "everything else" */
g->categories = &g->catspace[-(CHAR_MIN)];
(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
g->backrefs = 0;
/* do it */
@ -244,7 +210,6 @@ cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
g->laststate = THERE();
/* tidy up loose ends and fill things in */
categorize(p, g);
stripsnug(p, g);
findmust(p, g);
g->nplus = pluscount(p, g);
@ -253,14 +218,14 @@ cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
preg->re_g = g;
preg->re_magic = MAGIC1;
#ifndef REDEBUG
/* not debugging, so can't rely on the assert() in cli_regexec() */
if (g->iflags&REGEX_BAD)
/* not debugging, so can't rely on the assert() in regexec() */
if (g->iflags&BAD)
SETERROR(REG_ASSERT);
#endif
/* win or lose, we're done */
if (p->error != 0) /* lose */
cli_regfree(preg);
regfree(preg);
return(p->error);
}
@ -271,8 +236,8 @@ static void
p_ere(struct parse *p, int stop) /* character this ERE should end at */
{
char c;
sopno prevback = 0;
sopno prevfwd = 0;
sopno prevback;
sopno prevfwd;
sopno conc;
int first = 1; /* is this the first alternative? */
@ -339,20 +304,8 @@ p_ere_exp(struct parse *p)
assert(p->pend[subno] != 0);
}
EMIT(ORPAREN, subno);
MUSTEAT(')', REG_EPAREN);
break;
#ifndef POSIX_MISTAKE
case ')': /* happens only if no current unmatched ( */
/*
* You may ask, why the ifndef? Because I didn't notice
* this until slightly too late for 1003.2, and none of the
* other 1003.2 regular-expression reviewers noticed it at
* all. So an unmatched ) is legal POSIX, at least until
* we can get it fixed.
*/
SETERROR(REG_EPAREN);
REQUIRE(MORE() && GETNEXT() == ')', REG_EPAREN);
break;
#endif
case '^':
EMIT(OBOL, 0);
p->g->iflags |= USEBOL;
@ -384,12 +337,14 @@ p_ere_exp(struct parse *p)
case '\\':
REQUIRE(MORE(), REG_EESCAPE);
c = GETNEXT();
ordinary(p, c);
backslash(p, c);
break;
case '{': /* okay as ordinary except if digit follows */
REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
/* FALLTHROUGH */
default:
if (p->error != 0)
return;
ordinary(p, c);
break;
}
@ -432,7 +387,7 @@ p_ere_exp(struct parse *p)
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = REGEX_INFINITY;
count2 = INFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
@ -536,6 +491,12 @@ p_simp_re(struct parse *p,
case '[':
p_bracket(p);
break;
case BACKSL|'<':
EMIT(OBOW, 0);
break;
case BACKSL|'>':
EMIT(OEOW, 0);
break;
case BACKSL|'{':
SETERROR(REG_BADRPT);
break;
@ -571,7 +532,7 @@ p_simp_re(struct parse *p,
i = (c&~BACKSL) - '0';
assert(i < NPAREN);
if (p->pend[i] != 0) {
assert((size_t)i <= p->g->nsub);
assert(i <= p->g->nsub);
EMIT(OBACK_, i);
assert(p->pbegin[i] != 0);
assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
@ -586,6 +547,8 @@ p_simp_re(struct parse *p,
REQUIRE(starordinary, REG_BADRPT);
/* FALLTHROUGH */
default:
if (p->error != 0)
return(0); /* Definitely not $... */
ordinary(p, (char)c);
break;
}
@ -603,7 +566,7 @@ p_simp_re(struct parse *p,
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = REGEX_INFINITY;
count2 = INFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
@ -650,15 +613,17 @@ p_bracket(struct parse *p)
int invert = 0;
/* Dept of Truly Sickening Special-Case Kludges */
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
EMIT(OBOW, 0);
NEXTn(6);
return;
}
if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
EMIT(OEOW, 0);
NEXTn(6);
return;
if (p->end - p->next > 5) {
if (strncmp(p->next, "[:<:]]", 6) == 0) {
EMIT(OBOW, 0);
NEXTn(6);
return;
}
if (strncmp(p->next, "[:>:]]", 6) == 0) {
EMIT(OEOW, 0);
NEXTn(6);
return;
}
}
if ((cs = allocset(p)) == NULL) {
@ -676,7 +641,7 @@ p_bracket(struct parse *p)
p_b_term(p, cs);
if (EAT('-'))
CHadd(cs, '-');
MUSTEAT(']', REG_EBRACK);
REQUIRE(MORE() && GETNEXT() == ']', REG_EBRACK);
if (p->error != 0) { /* don't mess things up further */
freeset(p, cs);
@ -693,8 +658,6 @@ p_bracket(struct parse *p)
if (ci != i)
CHadd(cs, ci);
}
if (cs->multis != NULL)
mccase(p, cs);
}
if (invert) {
int i;
@ -706,12 +669,8 @@ p_bracket(struct parse *p)
CHadd(cs, i);
if (p->g->cflags&REG_NEWLINE)
CHsub(cs, '\n');
if (cs->multis != NULL)
mcinvert(p, cs);
}
assert(cs->multis == NULL); /* xxx */
if (nch(p, cs) == 1) { /* optimize singleton sets */
ordinary(p, firstch(p, cs));
freeset(p, cs);
@ -788,13 +747,13 @@ p_b_term(struct parse *p, cset *cs)
static void
p_b_cclass(struct parse *p, cset *cs)
{
char *sp = p->next;
struct cclass *cp;
const char *sp = p->next;
const struct cclass *cp;
size_t len;
const char *u;
char c;
while (MORE() && isalpha(PEEK()))
while (MORE() && isalpha((uch)PEEK()))
NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
@ -809,8 +768,6 @@ p_b_cclass(struct parse *p, cset *cs)
u = cp->chars;
while ((c = *u++) != '\0')
CHadd(cs, c);
for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
MCadd(p, cs, u);
}
/*
@ -852,9 +809,9 @@ static char /* value of collating element */
p_b_coll_elem(struct parse *p,
int endc) /* name ended by endc,']' */
{
char *sp = p->next;
struct cname *cp;
int len;
const char *sp = p->next;
const struct cname *cp;
size_t len;
while (MORE() && !SEETWO(endc, ']'))
NEXT();
@ -864,7 +821,7 @@ p_b_coll_elem(struct parse *p,
}
len = p->next - sp;
for (cp = cnames; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
return(cp->code); /* known name */
if (len == 1)
return(*sp); /* single character */
@ -896,8 +853,8 @@ othercase(int ch)
static void
bothcases(struct parse *p, int ch)
{
char *oldnext = p->next;
char *oldend = p->end;
const char *oldnext = p->next;
const char *oldend = p->end;
char bracket[3];
ch = (uch)ch;
@ -919,14 +876,28 @@ bothcases(struct parse *p, int ch)
static void
ordinary(struct parse *p, int ch)
{
cat_t *cap = p->g->categories;
if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
bothcases(p, ch);
else {
else
EMIT(OCHAR, (uch)ch);
if (cap[ch] == 0)
cap[ch] = p->g->ncategories++;
}
/*
* do something magic with this character, but only if it's extra magic
*/
static void
backslash(struct parse *p, int ch)
{
switch (ch) {
case '<':
EMIT(OBOW, 0);
break;
case '>':
EMIT(OEOW, 0);
break;
default:
ordinary(p, ch);
break;
}
}
@ -938,16 +909,12 @@ ordinary(struct parse *p, int ch)
static void
nonnewline(struct parse *p)
{
char *oldnext = p->next;
char *oldend = p->end;
char bracket[4];
const char *oldnext = p->next;
const char *oldend = p->end;
static const char bracket[4] = { '^', '\n', ']', '\0' };
p->next = bracket;
p->end = bracket+3;
bracket[0] = '^';
bracket[1] = '\n';
bracket[2] = ']';
bracket[3] = '\0';
p_bracket(p);
assert(p->next == bracket+3);
p->next = oldnext;
@ -961,13 +928,13 @@ static void
repeat(struct parse *p,
sopno start, /* operand from here to end of strip */
int from, /* repeated from this number */
int to) /* to this number of times (maybe REGEX_INFINITY) */
int to) /* to this number of times (maybe INFINITY) */
{
sopno finish = HERE();
# define N 2
# define INF 3
# define REP(f, t) ((f)*8 + (t))
# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGEX_INFINITY) ? INF : N)
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
sopno copy;
if (p->error != 0) /* head off possible runaway recursion */
@ -1027,14 +994,13 @@ repeat(struct parse *p,
/*
- seterr - set an error condition
*/
static int /* useless but makes type checking happy */
static void
seterr(struct parse *p, int e)
{
if (p->error == 0) /* keep earliest error condition */
p->error = e;
p->next = nuls; /* try to bring things to a halt */
p->end = nuls;
return(0); /* make the return value well-defined */
}
/*
@ -1056,16 +1022,16 @@ allocset(struct parse *p)
p->ncsalloc += CHAR_BIT;
nc = p->ncsalloc;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT * css;
ptr = (cset *)cli_realloc((char *)p->g->sets, nc * sizeof(cset));
ptr = reallocarray(p->g->sets, nc, sizeof(cset));
if (ptr == NULL)
goto nomem;
p->g->sets = ptr;
ptr = (uch *)cli_realloc((char *)p->g->setbits, nbytes);
ptr = reallocarray(p->g->setbits, nc / CHAR_BIT, css);
if (ptr == NULL)
goto nomem;
nbytes = (nc / CHAR_BIT) * css;
p->g->setbits = ptr;
for (i = 0; i < no; i++)
@ -1081,8 +1047,6 @@ allocset(struct parse *p)
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
cs->mask = 1 << ((no) % CHAR_BIT);
cs->hash = 0;
cs->smultis = 0;
cs->multis = NULL;
return(cs);
nomem:
@ -1102,7 +1066,7 @@ nomem:
static void
freeset(struct parse *p, cset *cs)
{
size_t i;
int i;
cset *top = &p->g->sets[p->g->ncsets];
size_t css = (size_t)p->g->csetsize;
@ -1125,7 +1089,7 @@ static int /* set number */
freezeset(struct parse *p, cset *cs)
{
uch h = cs->hash;
size_t i;
int i;
cset *top = &p->g->sets[p->g->ncsets];
cset *cs2;
size_t css = (size_t)p->g->csetsize;
@ -1135,7 +1099,7 @@ freezeset(struct parse *p, cset *cs)
if (cs2->hash == h && cs2 != cs) {
/* maybe */
for (i = 0; i < css; i++)
if (!!CHIN(cs2, i) != !!CHIN(cs, i))
if (CHIN(cs2, i) != CHIN(cs, i))
break; /* no */
if (i == css)
break; /* yes */
@ -1155,7 +1119,7 @@ freezeset(struct parse *p, cset *cs)
static int /* character; there is no "none" value */
firstch(struct parse *p, cset *cs)
{
size_t i;
int i;
size_t css = (size_t)p->g->csetsize;
for (i = 0; i < css; i++)
@ -1171,7 +1135,7 @@ firstch(struct parse *p, cset *cs)
static int
nch(struct parse *p, cset *cs)
{
size_t i;
int i;
size_t css = (size_t)p->g->csetsize;
int n = 0;
@ -1181,117 +1145,6 @@ nch(struct parse *p, cset *cs)
return(n);
}
/*
- mcadd - add a collating element to a cset
*/
static void
mcadd( struct parse *p, cset *cs, const char *cp)
{
size_t oldend = cs->smultis;
void *np;
cs->smultis += strlen(cp) + 1;
np = cli_realloc(cs->multis, cs->smultis);
if (np == NULL) {
if (cs->multis)
free(cs->multis);
cs->multis = NULL;
SETERROR(REG_ESPACE);
return;
}
cs->multis = np;
cli_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
}
/*
- mcinvert - invert the list of collating elements in a cset
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
/* ARGSUSED */
static void
mcinvert(struct parse *p, cset *cs)
{
UNUSEDPARAM(p);
assert(cs->multis == NULL); /* xxx */
}
/*
- mccase - add case counterparts of the list of collating elements in a cset
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
/* ARGSUSED */
static void
mccase(struct parse *p, cset *cs)
{
UNUSEDPARAM(p);
assert(cs->multis == NULL); /* xxx */
}
/*
- isinsets - is this character in any sets?
*/
static int /* predicate */
isinsets(struct re_guts *g, int c)
{
uch *col;
int i;
int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
unsigned uc = (uch)c;
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
if (col[uc] != 0)
return(1);
return(0);
}
/*
- samesets - are these two characters in exactly the same sets?
*/
static int /* predicate */
samesets(struct re_guts *g, int c1, int c2)
{
uch *col;
int i;
int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
unsigned uc1 = (uch)c1;
unsigned uc2 = (uch)c2;
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
if (col[uc1] != col[uc2])
return(0);
return(1);
}
/*
- categorize - sort out character categories
*/
static void
categorize(struct parse *p, struct re_guts *g)
{
cat_t *cats = g->categories;
int c;
int c2;
cat_t cat;
/* avoid making error situations worse */
if (p->error != 0)
return;
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
if (cats[c] == 0 && isinsets(g, c)) {
cat = g->ncategories++;
cats[c] = cat;
for (c2 = c+1; c2 <= CHAR_MAX; c2++)
if (cats[c2] == 0 && samesets(g, c, c2))
cats[c2] = cat;
}
}
/*
- dupl - emit a duplicate of a bunch of sops
*/
@ -1306,10 +1159,9 @@ dupl(struct parse *p,
assert(finish >= start);
if (len == 0)
return(ret);
enlarge(p, p->ssize + len); /* this many unexpected additions */
assert(p->ssize >= p->slen + len);
(void) memmove((char *)(p->strip + p->slen),
(char *)(p->strip + start), (size_t)len*sizeof(sop));
if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
return(ret);
(void) memcpy(p->strip + p->slen, p->strip + start, len * sizeof(sop));
p->slen += len;
return(ret);
}
@ -1333,8 +1185,8 @@ doemit(struct parse *p, sop op, size_t opnd)
/* deal with undersized strip */
if (p->slen >= p->ssize)
enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
assert(p->slen < p->ssize);
if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
return;
/* finally, it's all reduced to the easy case */
p->strip[p->slen++] = SOP(op, opnd);
@ -1392,21 +1244,22 @@ dofwd(struct parse *p, sopno pos, sop value)
/*
- enlarge - enlarge the strip
*/
static void
static int
enlarge(struct parse *p, sopno size)
{
sop *sp;
if (p->ssize >= size)
return;
return 1;
sp = (sop *)cli_realloc(p->strip, size*sizeof(sop));
sp = reallocarray(p->strip, size, sizeof(sop));
if (sp == NULL) {
SETERROR(REG_ESPACE);
return;
return 0;
}
p->strip = sp;
p->ssize = size;
return 1;
}
/*
@ -1416,7 +1269,7 @@ static void
stripsnug(struct parse *p, struct re_guts *g)
{
g->nstates = p->slen;
g->strip = (sop *)cli_realloc((char *)p->strip, p->slen * sizeof(sop));
g->strip = reallocarray(p->strip, p->slen, sizeof(sop));
if (g->strip == NULL) {
SETERROR(REG_ESPACE);
g->strip = p->strip;
@ -1436,8 +1289,8 @@ static void
findmust(struct parse *p, struct re_guts *g)
{
sop *scan;
sop *start = NULL; /* start initialized in the default case, after that */
sop *newstart = NULL; /* newstart was initialized in the OCHAR case */
sop *start; /* start initialized in the default case, after that */
sop *newstart; /* newstart was initialized in the OCHAR case */
sopno newlen;
sop s;
char *cp;
@ -1471,7 +1324,7 @@ findmust(struct parse *p, struct re_guts *g)
/* assert() interferes w debug printouts */
if (OP(s) != O_QUEST && OP(s) != O_CH &&
OP(s) != OOR2) {
g->iflags |= REGEX_BAD;
g->iflags |= BAD;
return;
}
} while (OP(s) != O_QUEST && OP(s) != O_CH);
@ -1488,13 +1341,9 @@ findmust(struct parse *p, struct re_guts *g)
if (g->mlen == 0) /* there isn't one */
return;
if (start == NULL) { /* something went wrong */
g->mlen = 0;
return;
}
/* turn it into a character string */
g->must = cli_malloc((size_t)g->mlen + 1);
g->must = malloc((size_t)g->mlen + 1);
if (g->must == NULL) { /* argh; just forget it */
g->mlen = 0;
return;
@ -1508,7 +1357,7 @@ findmust(struct parse *p, struct re_guts *g)
*cp++ = (char)OPND(s);
}
assert(cp == g->must + g->mlen);
*cp++ = '\0'; /* just on general principles */
*cp = '\0'; /* just on general principles */
}
/*
@ -1540,6 +1389,6 @@ pluscount(struct parse *p, struct re_guts *g)
}
} while (OP(s) != OEND);
if (plusnest != 0)
g->iflags |= REGEX_BAD;
g->iflags |= BAD;
return(maxnest);
}

@ -1,6 +1,5 @@
/* $OpenBSD: regerror.c,v 1.15 2020/12/30 08:56:38 tb Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -41,21 +40,18 @@
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include "clamav.h"
#include "others.h"
#include "regex.h"
#include <regex.h>
#include "utils.h"
static const char *regatoi(const regex_t *, char *, int);
static struct rerr {
static const struct rerr {
int code;
const char *name;
const char *explain;
} rerrs[] = {
{ REG_NOMATCH, "REG_NOMATCH", "cli_regexec() failed to match" },
{ REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
{ REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
@ -75,14 +71,13 @@ static struct rerr {
};
/*
- cli_regerror - the interface to error numbers
= extern size_t cli_regerror(int, const regex_t *, char *, size_t);
- regerror - the interface to error numbers
= extern size_t regerror(int, const regex_t *, char *, size_t);
*/
/* ARGSUSED */
size_t
cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
struct rerr *r;
const struct rerr *r;
size_t len;
int target = errcode &~ REG_ITOA;
const char *s;
@ -98,7 +93,7 @@ cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
if (errcode&REG_ITOA) {
if (r->code != 0) {
assert(strlen(r->name) < sizeof(convbuf));
(void) cli_strlcpy(convbuf, r->name, sizeof convbuf);
(void) strlcpy(convbuf, r->name, sizeof convbuf);
} else
(void)snprintf(convbuf, sizeof convbuf,
"REG_0x%x", target);
@ -107,12 +102,12 @@ cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
s = r->explain;
}
len = strlen(s) + 1;
if (errbuf_size > 0) {
cli_strlcpy(errbuf, s, errbuf_size);
}
if (errbuf_size != 0)
len = strlcpy(errbuf, s, errbuf_size);
else
len = strlen(s);
return(len);
return len + 1;
}
/*
@ -121,7 +116,7 @@ cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
static const char *
regatoi(const regex_t *preg, char *localbuf, int localbufsize)
{
struct rerr *r;
const struct rerr *r;
for (r = rerrs; r->code != 0; r++)
if (strcmp(r->name, preg->re_endp) == 0)

@ -1,6 +1,6 @@
/* $OpenBSD: regex2.h,v 1.12 2021/01/03 17:07:58 tb Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -106,52 +106,56 @@ typedef struct {
uch *ptr; /* -> uch [csetsize] */
uch mask; /* bit within array */
uch hash; /* hash code */
size_t smultis;
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
} cset;
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (uch)(c))
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (uch)(c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* cli_regcomp() internal fns */
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
#define MCin(p, cs, cp) mcin(p, cs, cp)
/* stuff for character categories */
typedef unsigned char cat_t;
static inline void
CHadd(cset *cs, char c)
{
cs->ptr[(uch)c] |= cs->mask;
cs->hash += c;
}
static inline void
CHsub(cset *cs, char c)
{
cs->ptr[(uch)c] &= ~cs->mask;
cs->hash -= c;
}
static inline int
CHIN(const cset *cs, char c)
{
return (cs->ptr[(uch)c] & cs->mask) != 0;
}
/*
* main compiled-expression structure
*/
struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int magic;
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
int cflags; /* copy of cli_regcomp() cflags argument */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of regcomp() cflags argument */
sopno nstates; /* = number of sops */
sopno firststate; /* the initial OEND (normally 0) */
sopno laststate; /* the final OEND */
int iflags; /* internal flags */
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define REGEX_BAD 04 /* something wrong */
# define BAD 04 /* something wrong */
int nbol; /* number of ^ used */
int neol; /* number of $ used */
int ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
int mlen; /* length of must */
int backrefs; /* does it use back references? */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
/* catspace must be last */
cat_t catspace[1]; /* actually [NC] */
};
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum((c)&0xff) || (c) == '_')
#define ISWORD(c) (isalnum(c) || (c) == '_')

@ -1,6 +1,5 @@
/* $OpenBSD: regexec.c,v 1.14 2018/07/11 12:38:46 martijn Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -36,7 +35,7 @@
*/
/*
* the outer shell of cli_regexec()
* the outer shell of regexec()
*
* This file includes engine.c *twice*, after muchos fiddling with the
* macros that code uses. This lets the same code operate on two different
@ -48,17 +47,14 @@
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include "clamav.h"
#include "others.h"
#include "regex.h"
#include <regex.h>
#include "utils.h"
#include "regex2.h"
/* macros for manipulating states, small version */
#define states long
#define states1 long /* for later use in cli_regexec() decision */
#define states1 states /* for later use in regexec() decision */
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
@ -110,10 +106,11 @@
#define SET0(v, n) ((v)[n] = 0)
#define SET1(v, n) ((v)[n] = 1)
#define ISSET(v, n) ((v)[n])
#define ASSIGN(d, s) memmove(d, s, m->g->nstates)
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
#define STATEVARS long vn; char *space
#define STATESETUP(m, nv) { (m)->space = cli_malloc((nv)*(m)->g->nstates); \
#define STATESETUP(m, nv) { (m)->space = reallocarray(NULL, \
(m)->g->nstates, (nv)); \
if ((m)->space == NULL) return(REG_ESPACE); \
(m)->vn = 0; }
#define STATETEARDOWN(m) { free((m)->space); }
@ -133,17 +130,18 @@
#include "engine.c"
/*
- cli_regexec - interface for matching
- regexec - interface for matching
*
* We put this here so we can exploit knowledge of the state representation
* when choosing which matcher to call. Also, by this point the matchers
* have been prototyped.
*/
int /* 0 success, REG_NOMATCH failure */
cli_regexec(const regex_t *preg, const char *string, size_t nmatch,
regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
struct re_guts *g = preg->re_g;
#ifdef REDEBUG
# define GOODFLAGS(f) (f)
#else
@ -152,13 +150,13 @@ cli_regexec(const regex_t *preg, const char *string, size_t nmatch,
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
return(REG_BADPAT);
assert(!(g->iflags&REGEX_BAD));
if (g->iflags&REGEX_BAD) /* backstop for no-debug case */
assert(!(g->iflags&BAD));
if (g->iflags&BAD) /* backstop for no-debug case */
return(REG_BADPAT);
eflags = GOODFLAGS(eflags);
if ((unsigned long)(g->nstates) <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
return(smatcher(g, string, nmatch, pmatch, eflags));
else
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
return(lmatcher(g, string, nmatch, pmatch, eflags));
}

@ -1,6 +1,5 @@
/* $OpenBSD: regfree.c,v 1.11 2015/12/28 22:27:03 mmcc Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -38,19 +37,17 @@
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include "clamav.h"
#include "others.h"
#include "regex.h"
#include <regex.h>
#include <limits.h>
#include "utils.h"
#include "regex2.h"
/*
- cli_regfree - free everything
- regfree - free everything
*/
void
cli_regfree(regex_t *preg)
regfree(regex_t *preg)
{
struct re_guts *g;
@ -63,13 +60,10 @@ cli_regfree(regex_t *preg)
preg->re_magic = 0; /* mark it invalid */
g->magic = 0; /* mark it invalid */
if (g->strip != NULL)
free((char *)g->strip);
if (g->sets != NULL)
free((char *)g->sets);
if (g->setbits != NULL)
free((char *)g->setbits);
if (g->must != NULL)
free(g->must);
free((char *)g);
free(g->strip);
free(g->sets);
free(g->setbits);
free(g->must);
free(g);
}
DEF_WEAK(regfree);

@ -1,6 +1,6 @@
/* $OpenBSD: utils.h,v 1.4 2003/06/02 20:18:36 millert Exp $ */
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
@ -36,20 +36,12 @@
*/
/* utility definitions */
#ifdef _POSIX2_RE_DUP_MAX
#define DUPMAX _POSIX2_RE_DUP_MAX
#else
#define DUPMAX 255
#endif
#define REGEX_INFINITY (DUPMAX + 1)
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;
/* switch off assertions (if not already off) if no REDEBUG */
#ifdef CL_DEBUG
#define REDEBUG
#endif
#ifndef REDEBUG
#ifndef NDEBUG
#define NDEBUG /* no assertions please */

Loading…
Cancel
Save