From 4e9ab8ed2fe38e8bb9529ce82a67e726915e2d5a Mon Sep 17 00:00:00 2001 From: Tomasz Kojm Date: Wed, 15 Nov 2006 15:26:54 +0000 Subject: [PATCH] pattern matcher accuracy improvements git-svn: trunk@2505 --- clamav-devel/ChangeLog | 7 ++ clamav-devel/libclamav/clamav.h | 3 +- clamav-devel/libclamav/filetypes.c | 32 +++-- clamav-devel/libclamav/matcher-ac.c | 176 +++++++++++++++++++++++----- clamav-devel/libclamav/matcher-ac.h | 13 +- clamav-devel/libclamav/matcher-bm.c | 15 ++- clamav-devel/libclamav/matcher-bm.h | 2 +- clamav-devel/libclamav/matcher.c | 128 +++++++------------- clamav-devel/libclamav/matcher.h | 2 +- clamav-devel/libclamav/regex_list.c | 18 ++- clamav-devel/libclamav/scanners.c | 4 +- 11 files changed, 243 insertions(+), 157 deletions(-) diff --git a/clamav-devel/ChangeLog b/clamav-devel/ChangeLog index 3f7fcdb5a..a137a8d94 100644 --- a/clamav-devel/ChangeLog +++ b/clamav-devel/ChangeLog @@ -1,3 +1,10 @@ +Wed Nov 15 16:18:09 CET 2006 (tk) +--------------------------------- + * libclamav: + the AC matcher now keeps a track of partial matches to improve + the accuracy of signatures with range wildcards + + add cli_ac_initdata() and cli_ac_freedata() + + fix some signedness warnings + Tue Nov 14 13:49:58 GMT 2006 (trog) ----------------------------------- * libclamav/ole2_extract.c: Fix Solaris endian issue. (bb#89) diff --git a/clamav-devel/libclamav/clamav.h b/clamav-devel/libclamav/clamav.h index 0d4f64722..dcc54d732 100644 --- a/clamav-devel/libclamav/clamav.h +++ b/clamav-devel/libclamav/clamav.h @@ -104,7 +104,8 @@ extern "C" /* internal structures */ struct cli_bm_patt { - char *pattern, *virname, *offset; + unsigned char *pattern; + char *virname, *offset; const char *viralias; unsigned int length; unsigned short target; diff --git a/clamav-devel/libclamav/filetypes.c b/clamav-devel/libclamav/filetypes.c index 92b39d9ed..1e4ebdced 100644 --- a/clamav-devel/libclamav/filetypes.c +++ b/clamav-devel/libclamav/filetypes.c @@ -26,6 +26,9 @@ #include #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #include "clamav.h" #include "filetypes.h" @@ -233,13 +236,11 @@ int is_tar(unsigned char *buf, unsigned int nbytes); cli_file_t cli_filetype2(int desc, const struct cl_engine *engine) { - char smallbuff[MAGIC_BUFFER_SIZE + 1], *decoded; - unsigned char *bigbuff; + unsigned char smallbuff[MAGIC_BUFFER_SIZE + 1], *decoded, *bigbuff; int bread, sret; cli_file_t ret = CL_TYPE_UNKNOWN_DATA; struct cli_matcher *root; - int *partcnt; - unsigned long int *partoff; + struct cli_ac_data mdata; memset(smallbuff, 0, sizeof(smallbuff)); @@ -251,33 +252,28 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine) if(!root) return ret; - if((partcnt = (int *) cli_calloc(root->ac_partsigs + 1, sizeof(int))) == NULL) { - cli_warnmsg("cli_filetype2(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(int)); + if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN)) return ret; - } - if((partoff = (unsigned long int *) cli_calloc(root->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) { - cli_dbgmsg("cli_filetype2(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(unsigned long int)); - free(partcnt); - return ret; - } + sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL); + + cli_ac_freedata(&mdata); - sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], partcnt, 1, 0, partoff, 0, -1, NULL); if(sret >= CL_TYPENO) { ret = sret; } else { - memset(partcnt, 0, (root->ac_partsigs + 1) * sizeof(int)); - memset(partoff, 0, (root->ac_partsigs + 1) * sizeof(unsigned long int)); + if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN)) + return ret; + decoded = cli_utf16toascii(smallbuff, bread); if(decoded) { - sret = cli_ac_scanbuff(decoded, strlen(decoded), NULL, engine->root[0], partcnt, 1, 0, partoff, 0, -1, NULL); + sret = cli_ac_scanbuff(decoded, strlen(decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL); free(decoded); if(sret == CL_TYPE_HTML) ret = CL_TYPE_HTML_UTF16; } + cli_ac_freedata(&mdata); } - free(partcnt); - free(partoff); } if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) { diff --git a/clamav-devel/libclamav/matcher-ac.c b/clamav-devel/libclamav/matcher-ac.c index 7215ea895..5140f7040 100644 --- a/clamav-devel/libclamav/matcher-ac.c +++ b/clamav-devel/libclamav/matcher-ac.c @@ -63,14 +63,14 @@ int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern) if(!next) { next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node)); if(!next) { - cli_dbgmsg("Unable to allocate AC node (%d)\n", sizeof(struct cli_ac_node)); + cli_errmsg("cli_ac_addpatt(): Unable to allocate AC node (%u bytes)\n", sizeof(struct cli_ac_node)); return CL_EMEM; } root->ac_nodes++; root->ac_nodetable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, (root->ac_nodes) * sizeof(struct cli_ac_node *)); if(root->ac_nodetable == NULL) { - cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->ac_nodes) * sizeof(struct cli_matcher *)); + cli_errmsg("cli_ac_addpatt(): Unable to realloc nodetable (%u bytes)\n", (root->ac_nodes) * sizeof(struct cli_matcher *)); return CL_EMEM; } root->ac_nodetable[root->ac_nodes - 1] = next; @@ -95,7 +95,7 @@ static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n) new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist)); if (new == NULL) { - cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist)); + cli_errmsg("cli_enqueue(): Unable to allocate node list (%u bytes)\n", sizeof(struct nodelist)); return CL_EMEM; } @@ -176,7 +176,7 @@ int cli_ac_buildtrie(struct cli_matcher *root) return CL_EMALFDB; if(!root->ac_root) { - cli_dbgmsg("AC pattern matcher not initialised\n"); + cli_dbgmsg("cli_ac_buildtrie(): AC pattern matcher is not initialised\n"); return CL_SUCCESS; } @@ -228,7 +228,7 @@ void cli_ac_free(struct cli_matcher *root) free(root->ac_root); } -inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned int offset, unsigned int length, const struct cli_ac_patt *pattern) +inline static int cli_findpos(const unsigned char *buffer, unsigned int depth, unsigned int offset, unsigned int length, const struct cli_ac_patt *pattern) { unsigned int bufferpos = offset + depth; unsigned int postfixend = offset + length; @@ -260,7 +260,7 @@ inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned i return 0; alt++; - } else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos]) + } else if(pattern->pattern[i] != CLI_IGN && (unsigned char) pattern->pattern[i] != buffer[bufferpos]) return 0; bufferpos++; @@ -288,7 +288,7 @@ inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned i return 0; alt++; - } else if(pattern->prefix[i] != CLI_IGN && (char) pattern->prefix[i] != buffer[bufferpos]) + } else if(pattern->prefix[i] != CLI_IGN && (unsigned char) pattern->prefix[i] != buffer[bufferpos]) return 0; bufferpos++; @@ -298,27 +298,119 @@ inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned i return 1; } -int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, int *partcnt, unsigned short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd, struct cli_matched_type **ftoffset) +int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int tracklen) +{ + unsigned int i, j; + + + if(!data) { + cli_errmsg("cli_ac_init(): data == NULL\n"); + return CL_ENULLARG; + } + + data->partsigs = partsigs; + + if(!partsigs) + return CL_SUCCESS; + + data->partcnt = (unsigned int *) cli_calloc(partsigs, sizeof(unsigned int)); + + if(!data->partcnt) { + cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int)); + return CL_EMEM; + } + + data->offcnt = (unsigned int *) cli_calloc(partsigs, sizeof(unsigned int)); + + if(!data->offcnt) { + cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int)); + free(data->partcnt); + return CL_EMEM; + } + + data->maxshift = (int *) cli_malloc(partsigs * sizeof(int)); + + if(!data->maxshift) { + cli_errmsg("cli_ac_init(): unable to cli_malloc(%u)\n", partsigs * sizeof(int)); + free(data->partcnt); + free(data->offcnt); + return CL_EMEM; + } + + memset(data->maxshift, -1, partsigs * sizeof(int)); + + data->partoff = (unsigned int **) cli_calloc(partsigs, sizeof(unsigned int *)); + + if(!data->partoff) { + cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int)); + free(data->partcnt); + free(data->offcnt); + free(data->maxshift); + return CL_EMEM; + } + + /* The number of multipart signatures is rather small so we already + * allocate the memory for all parts here instead of using a runtime + * allocation in cli_ac_scanbuff() + */ + + for(i = 0; i < partsigs; i++) { + data->partoff[i] = (unsigned int *) cli_calloc(tracklen, sizeof(unsigned int)); + + if(!data->partoff[i]) { + for(j = 0; j < i; j++) + free(data->partoff[j]); + + free(data->partoff); + free(data->partcnt); + free(data->offcnt); + free(data->maxshift); + cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", tracklen, sizeof(unsigned int)); + return CL_EMEM; + } + } + + return CL_SUCCESS; +} + +void cli_ac_freedata(struct cli_ac_data *data) +{ + unsigned int i; + + + if(data && data->partsigs) { + free(data->partcnt); + free(data->offcnt); + free(data->maxshift); + + for(i = 0; i < data->partsigs; i++) + free(data->partoff[i]); + + free(data->partoff); + } +} + +int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset) { struct cli_ac_node *current; struct cli_ac_patt *pt; - int type = CL_CLEAN, dist, t; - unsigned int i, position; + int type = CL_CLEAN, t, j; + unsigned int i, position, idx, found, curroff; struct cli_matched_type *tnode; if(!root->ac_root) return CL_CLEAN; - if(!partcnt || !partoff) { - cli_dbgmsg("cli_ac_scanbuff(): partcnt == NULL || partoff == NULL\n"); + if(!mdata) { + cli_errmsg("cli_ac_scanbuff(): mdata == NULL\n"); return CL_ENULLARG; } current = root->ac_root; for(i = 0; i < length; i++) { - current = current->trans[(unsigned char) buffer[i] & 0xff]; + current = current->trans[buffer[i] & 0xff]; if(current->islast) { position = i - ac_depth + 1; @@ -326,46 +418,67 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char pt = current->list; while(pt) { if(cli_findpos(buffer, ac_depth, position, length, pt)) { + curroff = offset + position - pt->prefix_length; + if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) { if(ftype == CL_TYPE_UNKNOWN_TEXT) t = type; else t = ftype; - if((fd == -1 && !t) || !cli_validatesig(t, pt->offset, offset + position - pt->prefix_length, fd, pt->virname)) { + if((fd == -1 && !t) || !cli_validatesig(t, pt->offset, curroff, fd, pt->virname)) { pt = pt->next; continue; } } if(pt->sigid) { /* it's a partial signature */ - if(partcnt[pt->sigid] + 1 == pt->partno) { - dist = 1; - if(pt->maxdist) - if((offset + i - pt->prefix_length) - partoff[pt->sigid] > pt->maxdist) - dist = 0; - if(dist && pt->mindist) - if((offset + i - pt->prefix_length) - partoff[pt->sigid] < pt->mindist) - dist = 0; + if(mdata->partcnt[pt->sigid - 1] + 1 == pt->partno) { + idx = mdata->offcnt[pt->sigid - 1]; + if(idx < AC_DEFAULT_TRACKLEN) { + mdata->partoff[pt->sigid - 1][idx] = curroff + pt->length; + + if(mdata->maxshift[pt->sigid - 1] == -1 || ((int) (mdata->partoff[pt->sigid - 1][idx] - mdata->partoff[pt->sigid - 1][0]) <= mdata->maxshift[pt->sigid - 1])) + mdata->offcnt[pt->sigid - 1]++; + } - if(dist) { - partoff[pt->sigid] = offset + i + pt->length; + } else if(mdata->partcnt[pt->sigid - 1] + 2 == pt->partno) { + found = 0; + for(j = mdata->offcnt[pt->sigid - 1] - 1; j >= 0; j--) { + found = 1; + if(pt->maxdist) + if(curroff - mdata->partoff[pt->sigid - 1][j] > pt->maxdist) + found = 0; - if(++partcnt[pt->sigid] == pt->parts) { /* the last one */ + if(found && pt->mindist) + if(curroff - mdata->partoff[pt->sigid - 1][j] < pt->mindist) + found = 0; + + if(found) + break; + } + + if(found) { + mdata->maxshift[pt->sigid - 1] = mdata->partoff[pt->sigid - 1][j] + pt->maxdist - curroff; + + mdata->partoff[pt->sigid - 1][0] = curroff + pt->length; + mdata->offcnt[pt->sigid - 1] = 1; + + if(++mdata->partcnt[pt->sigid - 1] + 1 == pt->parts) { if(pt->type) { if(otfrec) { if(pt->type > type || pt->type >= CL_TYPE_SFX) { - cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position - pt->prefix_length); + cli_dbgmsg("Matched signature for file type %s\n", pt->virname); type = pt->type; if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) { if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) { - cli_errmsg("Can't alloc memory for new type node\n"); + cli_errmsg("cli_ac_scanbuff(): Can't allocate memory for new type node\n"); return CL_EMEM; } tnode->type = type; - tnode->offset = offset + position - pt->prefix_length; + tnode->offset = -1; /* we don't remember the offset of the first part */ if(*ftoffset) tnode->cnt = (*ftoffset)->cnt + 1; @@ -374,7 +487,6 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char tnode->next = *ftoffset; *ftoffset = tnode; - } } } @@ -392,15 +504,15 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char if(pt->type) { if(otfrec) { if(pt->type > type || pt->type >= CL_TYPE_SFX) { - cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position - pt->prefix_length); + cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, curroff); type = pt->type; if(ftoffset && (!*ftoffset ||(*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) { if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) { - cli_errmsg("Can't alloc memory for new type node\n"); + cli_errmsg("cli_ac_scanbuff(): Can't allocate memory for new type node\n"); return CL_EMEM; } tnode->type = type; - tnode->offset = offset + position - pt->prefix_length; + tnode->offset = curroff; if(*ftoffset) tnode->cnt = (*ftoffset)->cnt + 1; diff --git a/clamav-devel/libclamav/matcher-ac.h b/clamav-devel/libclamav/matcher-ac.h index 03e5df7ff..da1f20fa0 100644 --- a/clamav-devel/libclamav/matcher-ac.h +++ b/clamav-devel/libclamav/matcher-ac.h @@ -25,9 +25,20 @@ #include "filetypes.h" #define AC_DEFAULT_DEPTH 2 +#define AC_DEFAULT_TRACKLEN 8 + +struct cli_ac_data { + unsigned int partsigs; + unsigned int *partcnt; + unsigned int **partoff; + unsigned int *offcnt; + int *maxshift; +}; int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern); -int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, int *partcnt, unsigned short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd, struct cli_matched_type **ftoffset); +int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int histlen); +void cli_ac_freedata(struct cli_ac_data *data); +int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset); int cli_ac_buildtrie(struct cli_matcher *root); void cli_ac_free(struct cli_matcher *root); void cli_ac_setdepth(unsigned int depth); diff --git a/clamav-devel/libclamav/matcher-bm.c b/clamav-devel/libclamav/matcher-bm.c index 313c37f38..c6539b06d 100644 --- a/clamav-devel/libclamav/matcher-bm.c +++ b/clamav-devel/libclamav/matcher-bm.c @@ -32,15 +32,14 @@ /* #define BM_TEST_OFFSET 5 */ #define BM_BLOCK_SIZE 3 -#define HASH(a,b,c) 211 * (unsigned char) a + 37 * (unsigned char) b + (unsigned char) c -#define DHASH(a,b,c) 211 * a + 37 * b + c +#define HASH(a,b,c) (211 * a + 37 * b + c) int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern) { int i; uint16_t idx; - const char *pt = pattern->pattern; + const unsigned char *pt = pattern->pattern; struct cli_bm_patt *prev, *next = NULL; @@ -80,7 +79,7 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern) int cli_bm_init(struct cli_matcher *root) { unsigned int i; - unsigned int size = DHASH(256, 256, 256); + unsigned int size = HASH(256, 256, 256); cli_dbgmsg("in cli_bm_init()\n"); @@ -104,7 +103,7 @@ void cli_bm_free(struct cli_matcher *root) { struct cli_bm_patt *b1, *b2; unsigned int i; - unsigned int size = DHASH(256, 256, 256); + unsigned int size = HASH(256, 256, 256); if(root->bm_shift) @@ -129,14 +128,14 @@ void cli_bm_free(struct cli_matcher *root) } } -int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd) +int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd) { unsigned int i, j, shift, off, found = 0; int idxtest; uint16_t idx; struct cli_bm_patt *p; - const char *bp; - char prefix; + const unsigned char *bp; + unsigned char prefix; if(!root->bm_shift) diff --git a/clamav-devel/libclamav/matcher-bm.h b/clamav-devel/libclamav/matcher-bm.h index 4794db15b..17aba3ccd 100644 --- a/clamav-devel/libclamav/matcher-bm.h +++ b/clamav-devel/libclamav/matcher-bm.h @@ -26,7 +26,7 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern); int cli_bm_init(struct cli_matcher *root); -int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd); +int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd); void cli_bm_free(struct cli_matcher *root); #endif diff --git a/clamav-devel/libclamav/matcher.c b/clamav-devel/libclamav/matcher.c index 7b7df7bda..8ac7ea027 100644 --- a/clamav-devel/libclamav/matcher.c +++ b/clamav-devel/libclamav/matcher.c @@ -52,10 +52,10 @@ extern short cli_debug_flag; #endif -int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype) +int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype) { - int ret = CL_CLEAN, i, tid = 0, *partcnt; - unsigned long int *partoff; + int ret = CL_CLEAN, i, tid = 0; + struct cli_ac_data mdata; struct cli_matcher *groot, *troot = NULL; #ifdef HAVE_NCORE void *streamhandle; @@ -209,43 +209,26 @@ int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, if(troot) { - if((partcnt = (int *) cli_calloc(troot->ac_partsigs + 1, sizeof(int))) == NULL) { - cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(int)); - return CL_EMEM; - } - - if((partoff = (unsigned long int *) cli_calloc(troot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) { - cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(unsigned long int)); - free(partcnt); - return CL_EMEM; - } + if((ret = cli_ac_initdata(&mdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN))) + return ret; if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, 0, ftype, -1)) != CL_VIRUS) - ret = cli_ac_scanbuff(buffer, length, virname, troot, partcnt, 0, 0, partoff, ftype, -1, NULL); + ret = cli_ac_scanbuff(buffer, length, virname, troot, &mdata, 0, 0, ftype, -1, NULL); - free(partcnt); - free(partoff); + cli_ac_freedata(&mdata); if(ret == CL_VIRUS) return ret; } - if((partcnt = (int *) cli_calloc(groot->ac_partsigs + 1, sizeof(int))) == NULL) { - cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(int)); - return CL_EMEM; - } - - if((partoff = (unsigned long int *) cli_calloc(groot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) { - cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(unsigned long int)); - free(partcnt); - return CL_EMEM; - } + if((ret = cli_ac_initdata(&mdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN))) + return ret; if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, 0, ftype, -1)) != CL_VIRUS) - ret = cli_ac_scanbuff(buffer, length, virname, groot, partcnt, 0, 0, partoff, ftype, -1, NULL); + ret = cli_ac_scanbuff(buffer, length, virname, groot, &mdata, 0, 0, ftype, -1, NULL); + + cli_ac_freedata(&mdata); - free(partcnt); - free(partoff); return ret; } @@ -408,10 +391,11 @@ int cli_validatesig(unsigned short ftype, const char *offstr, unsigned long int int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset) { - char *buffer, *buff, *endbl, *pt; - int ret = CL_CLEAN, *gpartcnt = NULL, *tpartcnt = NULL, type = CL_CLEAN, i, tid = 0, bytes; + unsigned char *buffer, *buff, *endbl, *upt; + int ret = CL_CLEAN, type = CL_CLEAN, i, tid = 0, bytes; unsigned int buffersize, length, maxpatlen, shift = 0; - unsigned long int *gpartoff = NULL, *tpartoff = NULL, offset = 0; + unsigned long int offset = 0; + struct cli_ac_data gdata, tdata; MD5_CTX md5ctx; unsigned char digest[16]; struct cli_md5_node *md5_node; @@ -423,6 +407,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f uint32_t datamask[2] = { 0xffffffff, 0xffffffff }; int count, hret; off_t origoff; + char *pt; #endif @@ -644,42 +629,17 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f /* prepare the buffer */ buffersize = maxpatlen + SCANBUFF; - if(!(buffer = (char *) cli_calloc(buffersize, sizeof(char)))) { + if(!(buffer = (unsigned char *) cli_calloc(buffersize, sizeof(unsigned char)))) { cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d)\n", buffersize); return CL_EMEM; } - if((gpartcnt = (int *) cli_calloc(groot->ac_partsigs + 1, sizeof(int))) == NULL) { - cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(int)); - free(buffer); - return CL_EMEM; - } - - if((gpartoff = (unsigned long int *) cli_calloc(groot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) { - cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(unsigned long int)); - free(buffer); - free(gpartcnt); - return CL_EMEM; - } + if((ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN))) + return ret; if(troot) { - - if((tpartcnt = (int *) cli_calloc(troot->ac_partsigs + 1, sizeof(int))) == NULL) { - cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(int)); - free(buffer); - free(gpartcnt); - free(gpartoff); - return CL_EMEM; - } - - if((tpartoff = (unsigned long int *) cli_calloc(troot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) { - cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(unsigned long int)); - free(buffer); - free(gpartcnt); - free(gpartoff); - free(tpartcnt); - return CL_EMEM; - } + if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN))) + return ret; } if(ctx->engine->md5_hlist) @@ -689,29 +649,27 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f buff = buffer; buff += maxpatlen; /* pointer to read data block */ endbl = buff + SCANBUFF - maxpatlen; /* pointer to the last block - * length of maxpatlen - */ + * length of maxpatlen + */ - pt = buff; + upt = buff; while((bytes = cli_readn(desc, buff + shift, SCANBUFF - shift)) > 0) { if(ctx->scanned) *ctx->scanned += bytes / CL_COUNT_PRECISION; length = shift + bytes; - if(pt == buffer) + if(upt == buffer) length += maxpatlen; if(troot) { - if(troot->ac_only || (ret = cli_bm_scanbuff(pt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS) - ret = cli_ac_scanbuff(pt, length, ctx->virname, troot, tpartcnt, otfrec, offset, tpartoff, ftype, desc, ftoffset); + if(troot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS) + ret = cli_ac_scanbuff(upt, length, ctx->virname, troot, &tdata, otfrec, offset, ftype, desc, ftoffset); if(ret == CL_VIRUS) { free(buffer); - free(gpartcnt); - free(gpartoff); - free(tpartcnt); - free(tpartoff); + cli_ac_freedata(&gdata); + cli_ac_freedata(&tdata); lseek(desc, 0, SEEK_SET); if(cli_checkfp(desc, ctx->engine)) @@ -721,17 +679,14 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f } } - if(groot->ac_only || (ret = cli_bm_scanbuff(pt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS) - ret = cli_ac_scanbuff(pt, length, ctx->virname, groot, gpartcnt, otfrec, offset, gpartoff, ftype, desc, ftoffset); + if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS) + ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset); if(ret == CL_VIRUS) { free(buffer); - free(gpartcnt); - free(gpartoff); - if(troot) { - free(tpartcnt); - free(tpartoff); - } + cli_ac_freedata(&gdata); + if(troot) + cli_ac_freedata(&tdata); lseek(desc, 0, SEEK_SET); if(cli_checkfp(desc, ctx->engine)) return CL_CLEAN; @@ -750,8 +705,8 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f memmove(buffer, endbl, maxpatlen); offset += SCANBUFF; - if(pt == buff) { - pt = buffer; + if(upt == buff) { + upt = buffer; offset -= maxpatlen; } @@ -764,12 +719,9 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f } free(buffer); - free(gpartcnt); - free(gpartoff); - if(troot) { - free(tpartcnt); - free(tpartoff); - } + cli_ac_freedata(&gdata); + if(troot) + cli_ac_freedata(&tdata); if(ctx->engine->md5_hlist) { MD5_Final(digest, &md5ctx); diff --git a/clamav-devel/libclamav/matcher.h b/clamav-devel/libclamav/matcher.h index 359a1550c..c55f1ddd0 100644 --- a/clamav-devel/libclamav/matcher.h +++ b/clamav-devel/libclamav/matcher.h @@ -28,7 +28,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset); -int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype); +int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype); int cli_validatesig(unsigned short ftype, const char *offstr, unsigned long int fileoff, int desc, const char *virname); diff --git a/clamav-devel/libclamav/regex_list.c b/clamav-devel/libclamav/regex_list.c index f7eec6411..b23be9dc5 100644 --- a/clamav-devel/libclamav/regex_list.c +++ b/clamav-devel/libclamav/regex_list.c @@ -19,6 +19,9 @@ * MA 02110-1301, USA. * * $Log: regex_list.c,v $ + * Revision 1.15 2006/11/15 15:26:54 tkojm + * pattern matcher accuracy improvements + * * Revision 1.14 2006/11/05 18:16:56 acab * Patch for bug 52 from Edvin * @@ -350,8 +353,8 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch size_t buffer_len = (hostOnly && !is_whitelist) ? real_len : real_len + display_len + 1; char* buffer = cli_malloc(buffer_len+1); size_t i; - int partcnt,rc = 0; - unsigned long int partoff; + int rc = 0; + struct cli_ac_data mdata; if(!buffer) return CL_EMEM; @@ -364,13 +367,18 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch } cli_dbgmsg("Looking up in regex_list: %s\n", buffer); - if(hostOnly) + if(hostOnly) { + if((rc = cli_ac_initdata(&mdata, 0, AC_DEFAULT_TRACKLEN))) + return rc; + rc = 0; + for(i = 0; i < matcher->root_hosts_cnt; i++) { - if(( rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&partcnt,0,0,&partoff,0,-1,NULL) )) + if(( rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&mdata,0,0,0,-1,NULL) )) break; } - else + } else rc = 0; + if(!rc && !hostOnly) rc = match_node(matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS; free(buffer); diff --git a/clamav-devel/libclamav/scanners.c b/clamav-devel/libclamav/scanners.c index 4be2d13c2..4957477d1 100644 --- a/clamav-devel/libclamav/scanners.c +++ b/clamav-devel/libclamav/scanners.c @@ -956,7 +956,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx) if(ctx->scanned) *ctx->scanned += data_len / CL_COUNT_PRECISION; - if(cli_scanbuff((char *) data, data_len, ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) { + if(cli_scanbuff(data, data_len, ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) { free(data); ret = CL_VIRUS; break; @@ -1003,7 +1003,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx) } else { if(ctx->scanned) *ctx->scanned += vba_project->length[i] / CL_COUNT_PRECISION; - if(cli_scanbuff((char *) data, vba_project->length[i], ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) { + if(cli_scanbuff(data, vba_project->length[i], ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) { free(data); ret = CL_VIRUS; break;