add regcomp(), regexec() impl. from OpenBSD's libc.

This code is licensed under the 3-clause BSD.
This will be used instead of system provided regexec()/regcomp() to
have consistent behaviour across platforms.


git-svn: trunk@3225
remotes/push_mirror/metadata
Török Edvin 18 years ago
parent 584aadd53b
commit 53ff1b0490
  1. 7
      ChangeLog
  2. 3
      clamav-config.h.in
  3. 3
      configure
  4. 2
      configure.in
  5. 5
      libclamav/Makefile.am
  6. 56
      libclamav/Makefile.in
  7. 18
      libclamav/phishcheck.c
  8. 4
      libclamav/phishcheck.h
  9. 68
      libclamav/regex/cclass.h
  10. 139
      libclamav/regex/cname.h
  11. 1020
      libclamav/regex/engine.c
  12. 1519
      libclamav/regex/regcomp.c
  13. 132
      libclamav/regex/regerror.c
  14. 102
      libclamav/regex/regex.h
  15. 157
      libclamav/regex/regex2.h
  16. 162
      libclamav/regex/regexec.c
  17. 73
      libclamav/regex/regfree.c
  18. 52
      libclamav/regex/strlcpy.c
  19. 59
      libclamav/regex/utils.h
  20. 11
      libclamav/regex_list.c

@ -1,3 +1,10 @@
Mon Sep 17 21:06:59 EEST 2007(edwin)
------------------------------------
* libclamav/regex/: add regcomp(), regexec() impl. from OpenBSD's libc.
This code is licensed under the 3-clause BSD.
This will be used instead of system provided regexec()/regcomp() to
have consistent behaviour across platforms.
Mon Sep 17 17:12:27 BST 2007 (njh)
----------------------------------
* libclamav/mbox.c: Bugs 665/667

@ -207,9 +207,6 @@
/* Define to 1 if you have the `recvmsg' function. */
#undef HAVE_RECVMSG
/* Define to 1 if you have the <regex.h> header file. */
#undef HAVE_REGEX_H
/* have resolv.h */
#undef HAVE_RESOLV_H

3
configure vendored

@ -19993,8 +19993,7 @@ fi
for ac_header in stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h regex.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h
for ac_header in stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h
do
as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then

@ -40,7 +40,7 @@ AC_DEFINE(SCANBUFF, 131072, [scan buffer size])
AC_DEFINE(FILEBUFF, 8192, [file i/o buffer size])
AC_HEADER_STDC
AC_CHECK_HEADERS(stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h regex.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h)
AC_CHECK_HEADERS(stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h)
AC_CHECK_HEADER(syslog.h,AC_DEFINE(USE_SYSLOG,1,[use syslog]),)
AC_TYPE_OFF_T

@ -110,6 +110,11 @@ libclamav_la_SOURCES = \
is_tar.h \
tnef.c \
tnef.h \
regex/strlcpy.c \
regex/regcomp.c \
regex/regerror.c \
regex/regexec.c \
regex/regfree.c \
unrar/unrar15.c \
unrar/unrar20.h \
unrar/unrarcmd.h \

@ -58,7 +58,7 @@ host_triplet = @host@
target_triplet = @target@
subdir = libclamav
DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
$(srcdir)/Makefile.in COPYING
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
$(top_srcdir)/configure.in
@ -84,10 +84,11 @@ am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \
pe.lo upx.lo htmlnorm.lo chmunpack.lo rebuildpe.lo petite.lo \
wwunpack.lo unsp.lo aspack.lo packlibs.lo fsg.lo mew.lo \
upack.lo line.lo untar.lo unzip.lo special.lo binhex.lo \
is_tar.lo tnef.lo unrar15.lo unrarvm.lo unrar.lo \
unrarfilter.lo unrarppm.lo unrar20.lo unrarcmd.lo unarj.lo \
LZMADecode.lo bzlib.lo infblock.lo nulsft.lo pdf.lo spin.lo \
yc.lo elf.lo sis.lo uuencode.lo pst.lo phishcheck.lo \
is_tar.lo tnef.lo strlcpy.lo regcomp.lo regerror.lo regexec.lo \
regfree.lo unrar15.lo unrarvm.lo unrar.lo unrarfilter.lo \
unrarppm.lo unrar20.lo unrarcmd.lo unarj.lo LZMADecode.lo \
bzlib.lo infblock.lo nulsft.lo pdf.lo spin.lo yc.lo elf.lo \
sis.lo uuencode.lo pst.lo phishcheck.lo \
phish_domaincheck_db.lo phish_whitelist.lo regex_list.lo \
sha256.lo mspack.lo cab.lo entconv.lo hashtab.lo dconf.lo \
lockdb.lo
@ -325,6 +326,11 @@ libclamav_la_SOURCES = \
is_tar.h \
tnef.c \
tnef.h \
regex/strlcpy.c \
regex/regcomp.c \
regex/regerror.c \
regex/regexec.c \
regex/regfree.c \
unrar/unrar15.c \
unrar/unrar20.h \
unrar/unrarcmd.h \
@ -505,7 +511,11 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pst.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdb.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rebuildpe.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regcomp.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regerror.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_list.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regexec.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regfree.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rtf.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanners.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@
@ -514,6 +524,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/special.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strlcpy.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/table.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/text.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tnef.Plo@am__quote@
@ -556,6 +567,41 @@ distclean-compile:
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
strlcpy.lo: regex/strlcpy.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT strlcpy.lo -MD -MP -MF "$(DEPDIR)/strlcpy.Tpo" -c -o strlcpy.lo `test -f 'regex/strlcpy.c' || echo '$(srcdir)/'`regex/strlcpy.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/strlcpy.Tpo" "$(DEPDIR)/strlcpy.Plo"; else rm -f "$(DEPDIR)/strlcpy.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/strlcpy.c' object='strlcpy.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o strlcpy.lo `test -f 'regex/strlcpy.c' || echo '$(srcdir)/'`regex/strlcpy.c
regcomp.lo: regex/regcomp.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regcomp.lo -MD -MP -MF "$(DEPDIR)/regcomp.Tpo" -c -o regcomp.lo `test -f 'regex/regcomp.c' || echo '$(srcdir)/'`regex/regcomp.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regcomp.Tpo" "$(DEPDIR)/regcomp.Plo"; else rm -f "$(DEPDIR)/regcomp.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regcomp.c' object='regcomp.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regcomp.lo `test -f 'regex/regcomp.c' || echo '$(srcdir)/'`regex/regcomp.c
regerror.lo: regex/regerror.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regerror.lo -MD -MP -MF "$(DEPDIR)/regerror.Tpo" -c -o regerror.lo `test -f 'regex/regerror.c' || echo '$(srcdir)/'`regex/regerror.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regerror.Tpo" "$(DEPDIR)/regerror.Plo"; else rm -f "$(DEPDIR)/regerror.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regerror.c' object='regerror.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regerror.lo `test -f 'regex/regerror.c' || echo '$(srcdir)/'`regex/regerror.c
regexec.lo: regex/regexec.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regexec.lo -MD -MP -MF "$(DEPDIR)/regexec.Tpo" -c -o regexec.lo `test -f 'regex/regexec.c' || echo '$(srcdir)/'`regex/regexec.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regexec.Tpo" "$(DEPDIR)/regexec.Plo"; else rm -f "$(DEPDIR)/regexec.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regexec.c' object='regexec.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regexec.lo `test -f 'regex/regexec.c' || echo '$(srcdir)/'`regex/regexec.c
regfree.lo: regex/regfree.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regfree.lo -MD -MP -MF "$(DEPDIR)/regfree.Tpo" -c -o regfree.lo `test -f 'regex/regfree.c' || echo '$(srcdir)/'`regex/regfree.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regfree.Tpo" "$(DEPDIR)/regfree.Plo"; else rm -f "$(DEPDIR)/regfree.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regfree.c' object='regfree.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regfree.lo `test -f 'regex/regfree.c' || echo '$(srcdir)/'`regex/regfree.c
unrar15.lo: unrar/unrar15.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unrar15.lo -MD -MP -MF "$(DEPDIR)/unrar15.Tpo" -c -o unrar15.lo `test -f 'unrar/unrar15.c' || echo '$(srcdir)/'`unrar/unrar15.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/unrar15.Tpo" "$(DEPDIR)/unrar15.Plo"; else rm -f "$(DEPDIR)/unrar15.Tpo"; exit 1; fi

@ -353,17 +353,17 @@ static int build_regex(regex_t* preg,const char* regex,int nosub)
{
int rc;
cli_dbgmsg("Phishcheck: Compiling regex: %s\n",regex);
rc = regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
rc = cli_regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
if(rc) {
#ifdef C_WINDOWS
cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n");
#else
size_t buflen = regerror(rc,preg,NULL,0);
size_t buflen = cli_regerror(rc,preg,NULL,0);
char *errbuf = cli_malloc(buflen);
if(errbuf) {
regerror(rc,preg,errbuf,buflen);
cli_regerror(rc,preg,errbuf,buflen);
cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
free(errbuf);
} else
@ -446,7 +446,7 @@ static int get_host(const struct phishcheck* s,struct string* dest,const char* U
static int isCountryCode(const struct phishcheck* s,const char* str)
{
return str ? !regexec(&s->preg_cctld,str,0,NULL,0) : 0;
return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
}
static int isTLD(const struct phishcheck* pchk,const char* str,int len)
@ -461,7 +461,7 @@ static int isTLD(const struct phishcheck* pchk,const char* str,int len)
return CL_EMEM;
strncpy(s,str,len);
s[len]='\0';
rc = !regexec(&pchk->preg_tld,s,0,NULL,0);
rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
free(s);
return rc ? 1 : 0;
}
@ -880,7 +880,7 @@ static char hex2int(const unsigned char* src)
static void free_regex(regex_t* p)
{
if(p) {
regfree(p);
cli_regfree(p);
}
}
@ -977,12 +977,12 @@ void phishing_done(struct cl_engine* engine)
*/
static int isURL(const struct phishcheck* pchk,const char* URL)
{
return URL ? !regexec(&pchk->preg,URL,0,NULL,0) : 0;
return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
}
static int isNumericURL(const struct phishcheck* pchk,const char* URL)
{
return URL ? !regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
return URL ? !cli_regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
}
/* Cleans up @urls
@ -1013,7 +1013,7 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str
string_free(host);
return CL_PHISH_TEXTURL;
}
if(url->flags&CHECK_CLOAKING && !regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
/* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */
string_free(host);
return CL_PHISH_HEX_URL;

@ -20,9 +20,7 @@
#ifndef _PHISH_CHECK_H
#define _PHISH_CHECK_H
#ifdef HAVE_REGEX_H
#include <regex.h>
#endif
#include "regex/regex.h"
#define CL_PHISH_BASE 100
enum phish_status {CL_PHISH_NODECISION=0,CL_PHISH_CLEAN=CL_PHISH_BASE, CL_PHISH_CLEANUP_OK,CL_PHISH_HOST_OK, CL_PHISH_DOMAIN_OK,

@ -0,0 +1,68 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
*/
/* character-class table */
static struct cclass {
const char *name;
const char *chars;
const char *multis;
} cclasses[] = {
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789", ""} ,
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
""} ,
{ "blank", " \t", ""} ,
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
{ "digit", "0123456789", ""} ,
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
""} ,
{ "lower", "abcdefghijklmnopqrstuvwxyz",
""} ,
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
""} ,
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
""} ,
{ "space", "\t\n\v\f\r ", ""} ,
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
""} ,
{ "xdigit", "0123456789ABCDEFabcdef",
""} ,
{ NULL, 0, "" }
};

@ -0,0 +1,139 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)cname.h 8.3 (Berkeley) 3/20/94
*/
/* character-name table */
static struct cname {
const char *name;
char code;
} cnames[] = {
{ "NUL", '\0' },
{ "SOH", '\001' },
{ "STX", '\002' },
{ "ETX", '\003' },
{ "EOT", '\004' },
{ "ENQ", '\005' },
{ "ACK", '\006' },
{ "BEL", '\007' },
{ "alert", '\007' },
{ "BS", '\010' },
{ "backspace", '\b' },
{ "HT", '\011' },
{ "tab", '\t' },
{ "LF", '\012' },
{ "newline", '\n' },
{ "VT", '\013' },
{ "vertical-tab", '\v' },
{ "FF", '\014' },
{ "form-feed", '\f' },
{ "CR", '\015' },
{ "carriage-return", '\r' },
{ "SO", '\016' },
{ "SI", '\017' },
{ "DLE", '\020' },
{ "DC1", '\021' },
{ "DC2", '\022' },
{ "DC3", '\023' },
{ "DC4", '\024' },
{ "NAK", '\025' },
{ "SYN", '\026' },
{ "ETB", '\027' },
{ "CAN", '\030' },
{ "EM", '\031' },
{ "SUB", '\032' },
{ "ESC", '\033' },
{ "IS4", '\034' },
{ "FS", '\034' },
{ "IS3", '\035' },
{ "GS", '\035' },
{ "IS2", '\036' },
{ "RS", '\036' },
{ "IS1", '\037' },
{ "US", '\037' },
{ "space", ' ' },
{ "exclamation-mark", '!' },
{ "quotation-mark", '"' },
{ "number-sign", '#' },
{ "dollar-sign", '$' },
{ "percent-sign", '%' },
{ "ampersand", '&' },
{ "apostrophe", '\'' },
{ "left-parenthesis", '(' },
{ "right-parenthesis", ')' },
{ "asterisk", '*' },
{ "plus-sign", '+' },
{ "comma", ',' },
{ "hyphen", '-' },
{ "hyphen-minus", '-' },
{ "period", '.' },
{ "full-stop", '.' },
{ "slash", '/' },
{ "solidus", '/' },
{ "zero", '0' },
{ "one", '1' },
{ "two", '2' },
{ "three", '3' },
{ "four", '4' },
{ "five", '5' },
{ "six", '6' },
{ "seven", '7' },
{ "eight", '8' },
{ "nine", '9' },
{ "colon", ':' },
{ "semicolon", ';' },
{ "less-than-sign", '<' },
{ "equals-sign", '=' },
{ "greater-than-sign", '>' },
{ "question-mark", '?' },
{ "commercial-at", '@' },
{ "left-square-bracket", '[' },
{ "backslash", '\\' },
{ "reverse-solidus", '\\' },
{ "right-square-bracket", ']' },
{ "circumflex", '^' },
{ "circumflex-accent", '^' },
{ "underscore", '_' },
{ "low-line", '_' },
{ "grave-accent", '`' },
{ "left-brace", '{' },
{ "left-curly-bracket", '{' },
{ "vertical-line", '|' },
{ "right-brace", '}' },
{ "right-curly-bracket", '}' },
{ "tilde", '~' },
{ "DEL", '\177' },
{ NULL, 0 }
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,132 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
*/
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include "others.h"
#include "regex.h"
#include "utils.h"
static const char *regatoi(const regex_t *, char *, int);
static struct rerr {
int code;
const char *name;
const char *explain;
} rerrs[] = {
{ REG_NOMATCH, "REG_NOMATCH", "cli_regexec() failed to match" },
{ REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
{ REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
{ REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
{ REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
{ REG_EBRACE, "REG_EBRACE", "braces not balanced" },
{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
{ REG_ESPACE, "REG_ESPACE", "out of memory" },
{ REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
{ REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
{ REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
{ 0, "", "*** unknown regexp error code ***" }
};
/*
- cli_regerror - the interface to error numbers
= extern size_t cli_regerror(int, const regex_t *, char *, size_t);
*/
/* ARGSUSED */
size_t
cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
struct rerr *r;
size_t len;
int target = errcode &~ REG_ITOA;
const char *s;
char convbuf[50];
if (errcode == REG_ATOI)
s = regatoi(preg, convbuf, sizeof convbuf);
else {
for (r = rerrs; r->code != 0; r++)
if (r->code == target)
break;
if (errcode&REG_ITOA) {
if (r->code != 0) {
assert(strlen(r->name) < sizeof(convbuf));
(void) cli_strlcpy(convbuf, r->name, sizeof convbuf);
} else
(void)snprintf(convbuf, sizeof convbuf,
"REG_0x%x", target);
s = convbuf;
} else
s = r->explain;
}
len = strlen(s) + 1;
if (errbuf_size > 0) {
cli_strlcpy(errbuf, s, errbuf_size);
}
return(len);
}
/*
- regatoi - internal routine to implement REG_ATOI
*/
static const char *
regatoi(const regex_t *preg, char *localbuf, int localbufsize)
{
struct rerr *r;
for (r = rerrs; r->code != 0; r++)
if (strcmp(r->name, preg->re_endp) == 0)
break;
if (r->code == 0)
return("0");
(void)snprintf(localbuf, localbufsize, "%d", r->code);
return(localbuf);
}

@ -0,0 +1,102 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992 Henry Spencer.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer of the University of Toronto.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regex.h 8.1 (Berkeley) 6/2/93
*/
#ifndef _REGEX_H_
#define _REGEX_H_
#include <sys/types.h>
/* types */
typedef off_t regoff_t;
typedef struct {
int re_magic;
size_t re_nsub; /* number of parenthesized subexpressions */
const char *re_endp; /* end pointer for REG_PEND */
struct re_guts *re_g; /* none of your business :-) */
} regex_t;
typedef struct {
regoff_t rm_so; /* start of match */
regoff_t rm_eo; /* end of match */
} regmatch_t;
/* cli_regcomp() flags */
#define REG_BASIC 0000
#define REG_EXTENDED 0001
#define REG_ICASE 0002
#define REG_NOSUB 0004
#define REG_NEWLINE 0010
#define REG_NOSPEC 0020
#define REG_PEND 0040
#define REG_DUMP 0200
/* cli_regerror() flags */
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
#define REG_ECTYPE 4
#define REG_EESCAPE 5
#define REG_ESUBREG 6
#define REG_EBRACK 7
#define REG_EPAREN 8
#define REG_EBRACE 9
#define REG_BADBR 10
#define REG_ERANGE 11
#define REG_ESPACE 12
#define REG_BADRPT 13
#define REG_EMPTY 14
#define REG_ASSERT 15
#define REG_INVARG 16
#define REG_ATOI 255 /* convert name to number (!) */
#define REG_ITOA 0400 /* convert number to name (!) */
/* cli_regexec() flags */
#define REG_NOTBOL 00001
#define REG_NOTEOL 00002
#define REG_STARTEND 00004
#define REG_TRACE 00400 /* tracing of execution */
#define REG_LARGE 01000 /* force large representation */
#define REG_BACKR 02000 /* force use of backref code */
int cli_regcomp(regex_t *, const char *, int);
size_t cli_regerror(int, const regex_t *, char *, size_t);
int cli_regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
void cli_regfree(regex_t *);
size_t cli_strlcpy(char *dst, const char *src, size_t siz);
#endif /* !_REGEX_H_ */

@ -0,0 +1,157 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
*/
/*
* internals of regex_t
*/
#define MAGIC1 ((('r'^0200)<<8) | 'e')
/*
* The internal representation is a *strip*, a sequence of
* operators ending with an endmarker. (Some terminology etc. is a
* historical relic of earlier versions which used multiple strips.)
* Certain oddities in the representation are there to permit running
* the machinery backwards; in particular, any deviation from sequential
* flow must be marked at both its source and its destination. Some
* fine points:
*
* - OPLUS_ and O_PLUS are *inside* the loop they create.
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
* OOR1 and OOR2 are respectively the end and the beginning of one of
* the branches. Note that there is an implicit OOR2 following OCH_
* and an implicit OOR1 preceding O_CH.
*
* In state representations, an operator's bit is on to signify a state
* immediately *preceding* "execution" of that operator.
*/
typedef unsigned long sop; /* strip operator */
typedef long sopno;
#define OPRMASK 0xf8000000LU
#define OPDMASK 0x07ffffffLU
#define OPSHIFT ((unsigned)27)
#define OP(n) ((n)&OPRMASK)
#define OPND(n) ((n)&OPDMASK)
#define SOP(op, opnd) ((op)|(opnd))
/* operators meaning operand */
/* (back, fwd are offsets) */
#define OEND (1LU<<OPSHIFT) /* endmarker - */
#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
#define OBOL (3LU<<OPSHIFT) /* left anchor - */
#define OEOL (4LU<<OPSHIFT) /* right anchor - */
#define OANY (5LU<<OPSHIFT) /* . - */
#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
#define OBOW (19LU<<OPSHIFT) /* begin word - */
#define OEOW (20LU<<OPSHIFT) /* end word - */
/*
* Structure for [] character-set representation. Character sets are
* done as bit vectors, grouped 8 to a byte vector for compactness.
* The individual set therefore has both a pointer to the byte vector
* and a mask to pick out the relevant bit of each byte. A hash code
* simplifies testing whether two sets could be identical.
*
* This will get trickier for multicharacter collating elements. As
* preliminary hooks for dealing with such things, we also carry along
* a string of multi-character elements, and decide the size of the
* vectors at run time.
*/
typedef struct {
uch *ptr; /* -> uch [csetsize] */
uch mask; /* bit within array */
uch hash; /* hash code */
size_t smultis;
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
} cset;
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* cli_regcomp() internal fns */
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
#define MCin(p, cs, cp) mcin(p, cs, cp)
/* stuff for character categories */
typedef unsigned char cat_t;
/*
* main compiled-expression structure
*/
struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of cli_regcomp() cflags argument */
sopno nstates; /* = number of sops */
sopno firststate; /* the initial OEND (normally 0) */
sopno laststate; /* the final OEND */
int iflags; /* internal flags */
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define BAD 04 /* something wrong */
int nbol; /* number of ^ used */
int neol; /* number of $ used */
int ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
int mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
/* catspace must be last */
cat_t catspace[1]; /* actually [NC] */
};
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum(c) || (c) == '_')

@ -0,0 +1,162 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
*/
/*
* the outer shell of cli_regexec()
*
* This file includes engine.c *twice*, after muchos fiddling with the
* macros that code uses. This lets the same code operate on two different
* representations for state sets.
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include "others.h"
#include "regex.h"
#include "utils.h"
#include "regex2.h"
/* macros for manipulating states, small version */
#define states long
#define states1 states /* for later use in cli_regexec() decision */
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
#define ASSIGN(d, s) ((d) = (s))
#define EQ(a, b) ((a) == (b))
#define STATEVARS long dummy /* dummy version */
#define STATESETUP(m, n) /* nothing */
#define STATETEARDOWN(m) /* nothing */
#define SETUP(v) ((v) = 0)
#define onestate long
#define INIT(o, n) ((o) = (unsigned long)1 << (n))
#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) (((v) & (o)) != 0)
/* some abbreviations; note that some of these know variable names! */
/* do "if I'm here, I can also be there" etc without branches */
#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
/* function names */
#define SNAMES /* engine.c looks after details */
#include "engine.c"
/* now undo things */
#undef states
#undef CLEAR
#undef SET0
#undef SET1
#undef ISSET
#undef ASSIGN
#undef EQ
#undef STATEVARS
#undef STATESETUP
#undef STATETEARDOWN
#undef SETUP
#undef onestate
#undef INIT
#undef INC
#undef ISSTATEIN
#undef FWD
#undef BACK
#undef ISSETBACK
#undef SNAMES
/* macros for manipulating states, large version */
#define states char *
#define CLEAR(v) memset(v, 0, m->g->nstates)
#define SET0(v, n) ((v)[n] = 0)
#define SET1(v, n) ((v)[n] = 1)
#define ISSET(v, n) ((v)[n])
#define ASSIGN(d, s) memmove(d, s, m->g->nstates)
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
#define STATEVARS long vn; char *space
#define STATESETUP(m, nv) { (m)->space = cli_malloc((nv)*(m)->g->nstates); \
if ((m)->space == NULL) return(REG_ESPACE); \
(m)->vn = 0; }
#define STATETEARDOWN(m) { free((m)->space); }
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
#define onestate long
#define INIT(o, n) ((o) = (n))
#define INC(o) ((o)++)
#define ISSTATEIN(v, o) ((v)[o])
/* some abbreviations; note that some of these know variable names! */
/* do "if I'm here, I can also be there" etc without branches */
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
#define ISSETBACK(v, n) ((v)[here - (n)])
/* function names */
#define LNAMES /* flag */
#include "engine.c"
/*
- cli_regexec - interface for matching
*
* We put this here so we can exploit knowledge of the state representation
* when choosing which matcher to call. Also, by this point the matchers
* have been prototyped.
*/
int /* 0 success, REG_NOMATCH failure */
cli_regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
struct re_guts *g = preg->re_g;
#ifdef REDEBUG
# define GOODFLAGS(f) (f)
#else
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
#endif
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
return(REG_BADPAT);
assert(!(g->iflags&BAD));
if (g->iflags&BAD) /* backstop for no-debug case */
return(REG_BADPAT);
eflags = GOODFLAGS(eflags);
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
else
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
}

@ -0,0 +1,73 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include "others.h"
#include "regex.h"
#include "utils.h"
#include "regex2.h"
/*
- cli_regfree - free everything
*/
void
cli_regfree(regex_t *preg)
{
struct re_guts *g;
if (preg->re_magic != MAGIC1) /* oops */
return; /* nice to complain, but hard */
g = preg->re_g;
if (g == NULL || g->magic != MAGIC2) /* oops again */
return;
preg->re_magic = 0; /* mark it invalid */
g->magic = 0; /* mark it invalid */
if (g->strip != NULL)
free((char *)g->strip);
if (g->sets != NULL)
free((char *)g->sets);
if (g->setbits != NULL)
free((char *)g->setbits);
if (g->must != NULL)
free(g->must);
free((char *)g);
}

@ -0,0 +1,52 @@
/*
* This code is derived from OpenBSD's libc, original license follows:
*
* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/types.h>
#include <string.h>
#include "regex.h"
/*
* Copy src to string dst of size siz. At most siz-1 characters
* will be copied. Always NUL terminates (unless siz == 0).
* Returns strlen(src); if retval >= siz, truncation occurred.
*/
size_t
cli_strlcpy(char *dst, const char *src, size_t siz)
{
char *d = dst;
const char *s = src;
size_t n = siz;
/* Copy as many bytes as will fit */
if (n != 0) {
while (--n != 0) {
if ((*d++ = *s++) == '\0')
break;
}
}
/* Not enough room in dst, add NUL and traverse rest of src */
if (n == 0) {
if (siz != 0)
*d = '\0'; /* NUL-terminate dst */
while (*s++)
;
}
return(s - src - 1); /* count does not include NUL */
}

@ -0,0 +1,59 @@
/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)utils.h 8.3 (Berkeley) 3/20/94
*/
/* utility definitions */
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;
/* switch off assertions (if not already off) if no REDEBUG */
#ifdef CL_DEBUG
#define REDEBUG
#endif
#ifndef REDEBUG
#ifndef NDEBUG
#define NDEBUG /* no assertions please */
#endif
#endif
#include <assert.h>
/* for old systems with bcopy() but no memmove() */
#ifdef USEBCOPY
#define memmove(d, s, c) bcopy(s, d, c)
#endif

@ -52,9 +52,7 @@
#include <limits.h>
#include <sys/types.h>
#ifdef HAVE_REGEX_H
#include <regex.h>
#endif
#include "regex/regex.h"
#include "clamav.h"
@ -357,7 +355,6 @@ static struct tree_node* stack_pop(struct node_stack* stack)
}
/* Initialization & loading */
/* Initializes @matcher, allocating necesarry substructures */
int init_regex_list(struct regex_matcher* matcher)
{
@ -1194,7 +1191,7 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co
preg=cli_malloc(sizeof(*preg));
if(!preg)
return CL_EMEM;
rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
rc = cli_regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
leaf->preg=preg;
if(rc)
return rc;
@ -1275,7 +1272,7 @@ static int match_node(struct tree_node* node,const unsigned char* c,size_t len,c
const struct leaf_info* leaf = node->u.leaf;
/*isleaf = 1;*/
if(leaf->preg) {
rc = !regexec(leaf->preg,(const char*)c,0,NULL,0);
rc = !cli_regexec(leaf->preg,(const char*)c,0,NULL,0);
}
else {
massert(*c==node->c && "We know this has to match[2]");
@ -1394,7 +1391,7 @@ static void destroy_tree_internal(struct regex_matcher* matcher,struct tree_node
stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.leaf);/* cast to make compiler happy, and to not make another stack implementation for storing void* */
stack_push_once(&matcher->node_stack,node);
if(leaf->preg) {
regfree(leaf->preg);
cli_regfree(leaf->preg);
free(leaf->preg);
leaf->preg=NULL;
}

Loading…
Cancel
Save