pattern matcher accuracy improvements

git-svn: trunk@2505
remotes/push_mirror/metadata
Tomasz Kojm 19 years ago
parent f8c58685bd
commit 4e9ab8ed2f
  1. 7
      clamav-devel/ChangeLog
  2. 3
      clamav-devel/libclamav/clamav.h
  3. 32
      clamav-devel/libclamav/filetypes.c
  4. 176
      clamav-devel/libclamav/matcher-ac.c
  5. 13
      clamav-devel/libclamav/matcher-ac.h
  6. 15
      clamav-devel/libclamav/matcher-bm.c
  7. 2
      clamav-devel/libclamav/matcher-bm.h
  8. 128
      clamav-devel/libclamav/matcher.c
  9. 2
      clamav-devel/libclamav/matcher.h
  10. 18
      clamav-devel/libclamav/regex_list.c
  11. 4
      clamav-devel/libclamav/scanners.c

@ -1,3 +1,10 @@
Wed Nov 15 16:18:09 CET 2006 (tk)
---------------------------------
* libclamav: + the AC matcher now keeps a track of partial matches to improve
the accuracy of signatures with range wildcards
+ add cli_ac_initdata() and cli_ac_freedata()
+ fix some signedness warnings
Tue Nov 14 13:49:58 GMT 2006 (trog)
-----------------------------------
* libclamav/ole2_extract.c: Fix Solaris endian issue. (bb#89)

@ -104,7 +104,8 @@ extern "C"
/* internal structures */
struct cli_bm_patt {
char *pattern, *virname, *offset;
unsigned char *pattern;
char *virname, *offset;
const char *viralias;
unsigned int length;
unsigned short target;

@ -26,6 +26,9 @@
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "clamav.h"
#include "filetypes.h"
@ -233,13 +236,11 @@ int is_tar(unsigned char *buf, unsigned int nbytes);
cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
{
char smallbuff[MAGIC_BUFFER_SIZE + 1], *decoded;
unsigned char *bigbuff;
unsigned char smallbuff[MAGIC_BUFFER_SIZE + 1], *decoded, *bigbuff;
int bread, sret;
cli_file_t ret = CL_TYPE_UNKNOWN_DATA;
struct cli_matcher *root;
int *partcnt;
unsigned long int *partoff;
struct cli_ac_data mdata;
memset(smallbuff, 0, sizeof(smallbuff));
@ -251,33 +252,28 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
if(!root)
return ret;
if((partcnt = (int *) cli_calloc(root->ac_partsigs + 1, sizeof(int))) == NULL) {
cli_warnmsg("cli_filetype2(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(int));
if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
return ret;
}
if((partoff = (unsigned long int *) cli_calloc(root->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
cli_dbgmsg("cli_filetype2(): unable to cli_calloc(%d, %d)\n", root->ac_partsigs + 1, sizeof(unsigned long int));
free(partcnt);
return ret;
}
sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
cli_ac_freedata(&mdata);
sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], partcnt, 1, 0, partoff, 0, -1, NULL);
if(sret >= CL_TYPENO) {
ret = sret;
} else {
memset(partcnt, 0, (root->ac_partsigs + 1) * sizeof(int));
memset(partoff, 0, (root->ac_partsigs + 1) * sizeof(unsigned long int));
if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
return ret;
decoded = cli_utf16toascii(smallbuff, bread);
if(decoded) {
sret = cli_ac_scanbuff(decoded, strlen(decoded), NULL, engine->root[0], partcnt, 1, 0, partoff, 0, -1, NULL);
sret = cli_ac_scanbuff(decoded, strlen(decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
free(decoded);
if(sret == CL_TYPE_HTML)
ret = CL_TYPE_HTML_UTF16;
}
cli_ac_freedata(&mdata);
}
free(partcnt);
free(partoff);
}
if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) {

@ -63,14 +63,14 @@ int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
if(!next) {
next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
if(!next) {
cli_dbgmsg("Unable to allocate AC node (%d)\n", sizeof(struct cli_ac_node));
cli_errmsg("cli_ac_addpatt(): Unable to allocate AC node (%u bytes)\n", sizeof(struct cli_ac_node));
return CL_EMEM;
}
root->ac_nodes++;
root->ac_nodetable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, (root->ac_nodes) * sizeof(struct cli_ac_node *));
if(root->ac_nodetable == NULL) {
cli_dbgmsg("Unable to realloc nodetable (%d)\n", (root->ac_nodes) * sizeof(struct cli_matcher *));
cli_errmsg("cli_ac_addpatt(): Unable to realloc nodetable (%u bytes)\n", (root->ac_nodes) * sizeof(struct cli_matcher *));
return CL_EMEM;
}
root->ac_nodetable[root->ac_nodes - 1] = next;
@ -95,7 +95,7 @@ static int cli_enqueue(struct nodelist **bfs, struct cli_ac_node *n)
new = (struct nodelist *) cli_calloc(1, sizeof(struct nodelist));
if (new == NULL) {
cli_dbgmsg("Unable to allocate node list (%d)\n", sizeof(struct nodelist));
cli_errmsg("cli_enqueue(): Unable to allocate node list (%u bytes)\n", sizeof(struct nodelist));
return CL_EMEM;
}
@ -176,7 +176,7 @@ int cli_ac_buildtrie(struct cli_matcher *root)
return CL_EMALFDB;
if(!root->ac_root) {
cli_dbgmsg("AC pattern matcher not initialised\n");
cli_dbgmsg("cli_ac_buildtrie(): AC pattern matcher is not initialised\n");
return CL_SUCCESS;
}
@ -228,7 +228,7 @@ void cli_ac_free(struct cli_matcher *root)
free(root->ac_root);
}
inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned int offset, unsigned int length, const struct cli_ac_patt *pattern)
inline static int cli_findpos(const unsigned char *buffer, unsigned int depth, unsigned int offset, unsigned int length, const struct cli_ac_patt *pattern)
{
unsigned int bufferpos = offset + depth;
unsigned int postfixend = offset + length;
@ -260,7 +260,7 @@ inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned i
return 0;
alt++;
} else if(pattern->pattern[i] != CLI_IGN && (char) pattern->pattern[i] != buffer[bufferpos])
} else if(pattern->pattern[i] != CLI_IGN && (unsigned char) pattern->pattern[i] != buffer[bufferpos])
return 0;
bufferpos++;
@ -288,7 +288,7 @@ inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned i
return 0;
alt++;
} else if(pattern->prefix[i] != CLI_IGN && (char) pattern->prefix[i] != buffer[bufferpos])
} else if(pattern->prefix[i] != CLI_IGN && (unsigned char) pattern->prefix[i] != buffer[bufferpos])
return 0;
bufferpos++;
@ -298,27 +298,119 @@ inline static int cli_findpos(const char *buffer, unsigned int depth, unsigned i
return 1;
}
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, int *partcnt, unsigned short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd, struct cli_matched_type **ftoffset)
int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int tracklen)
{
unsigned int i, j;
if(!data) {
cli_errmsg("cli_ac_init(): data == NULL\n");
return CL_ENULLARG;
}
data->partsigs = partsigs;
if(!partsigs)
return CL_SUCCESS;
data->partcnt = (unsigned int *) cli_calloc(partsigs, sizeof(unsigned int));
if(!data->partcnt) {
cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int));
return CL_EMEM;
}
data->offcnt = (unsigned int *) cli_calloc(partsigs, sizeof(unsigned int));
if(!data->offcnt) {
cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int));
free(data->partcnt);
return CL_EMEM;
}
data->maxshift = (int *) cli_malloc(partsigs * sizeof(int));
if(!data->maxshift) {
cli_errmsg("cli_ac_init(): unable to cli_malloc(%u)\n", partsigs * sizeof(int));
free(data->partcnt);
free(data->offcnt);
return CL_EMEM;
}
memset(data->maxshift, -1, partsigs * sizeof(int));
data->partoff = (unsigned int **) cli_calloc(partsigs, sizeof(unsigned int *));
if(!data->partoff) {
cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", partsigs, sizeof(unsigned int));
free(data->partcnt);
free(data->offcnt);
free(data->maxshift);
return CL_EMEM;
}
/* The number of multipart signatures is rather small so we already
* allocate the memory for all parts here instead of using a runtime
* allocation in cli_ac_scanbuff()
*/
for(i = 0; i < partsigs; i++) {
data->partoff[i] = (unsigned int *) cli_calloc(tracklen, sizeof(unsigned int));
if(!data->partoff[i]) {
for(j = 0; j < i; j++)
free(data->partoff[j]);
free(data->partoff);
free(data->partcnt);
free(data->offcnt);
free(data->maxshift);
cli_errmsg("cli_ac_init(): unable to cli_calloc(%u, %u)\n", tracklen, sizeof(unsigned int));
return CL_EMEM;
}
}
return CL_SUCCESS;
}
void cli_ac_freedata(struct cli_ac_data *data)
{
unsigned int i;
if(data && data->partsigs) {
free(data->partcnt);
free(data->offcnt);
free(data->maxshift);
for(i = 0; i < data->partsigs; i++)
free(data->partoff[i]);
free(data->partoff);
}
}
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset)
{
struct cli_ac_node *current;
struct cli_ac_patt *pt;
int type = CL_CLEAN, dist, t;
unsigned int i, position;
int type = CL_CLEAN, t, j;
unsigned int i, position, idx, found, curroff;
struct cli_matched_type *tnode;
if(!root->ac_root)
return CL_CLEAN;
if(!partcnt || !partoff) {
cli_dbgmsg("cli_ac_scanbuff(): partcnt == NULL || partoff == NULL\n");
if(!mdata) {
cli_errmsg("cli_ac_scanbuff(): mdata == NULL\n");
return CL_ENULLARG;
}
current = root->ac_root;
for(i = 0; i < length; i++) {
current = current->trans[(unsigned char) buffer[i] & 0xff];
current = current->trans[buffer[i] & 0xff];
if(current->islast) {
position = i - ac_depth + 1;
@ -326,46 +418,67 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char
pt = current->list;
while(pt) {
if(cli_findpos(buffer, ac_depth, position, length, pt)) {
curroff = offset + position - pt->prefix_length;
if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
if(ftype == CL_TYPE_UNKNOWN_TEXT)
t = type;
else
t = ftype;
if((fd == -1 && !t) || !cli_validatesig(t, pt->offset, offset + position - pt->prefix_length, fd, pt->virname)) {
if((fd == -1 && !t) || !cli_validatesig(t, pt->offset, curroff, fd, pt->virname)) {
pt = pt->next;
continue;
}
}
if(pt->sigid) { /* it's a partial signature */
if(partcnt[pt->sigid] + 1 == pt->partno) {
dist = 1;
if(pt->maxdist)
if((offset + i - pt->prefix_length) - partoff[pt->sigid] > pt->maxdist)
dist = 0;
if(dist && pt->mindist)
if((offset + i - pt->prefix_length) - partoff[pt->sigid] < pt->mindist)
dist = 0;
if(mdata->partcnt[pt->sigid - 1] + 1 == pt->partno) {
idx = mdata->offcnt[pt->sigid - 1];
if(idx < AC_DEFAULT_TRACKLEN) {
mdata->partoff[pt->sigid - 1][idx] = curroff + pt->length;
if(mdata->maxshift[pt->sigid - 1] == -1 || ((int) (mdata->partoff[pt->sigid - 1][idx] - mdata->partoff[pt->sigid - 1][0]) <= mdata->maxshift[pt->sigid - 1]))
mdata->offcnt[pt->sigid - 1]++;
}
if(dist) {
partoff[pt->sigid] = offset + i + pt->length;
} else if(mdata->partcnt[pt->sigid - 1] + 2 == pt->partno) {
found = 0;
for(j = mdata->offcnt[pt->sigid - 1] - 1; j >= 0; j--) {
found = 1;
if(pt->maxdist)
if(curroff - mdata->partoff[pt->sigid - 1][j] > pt->maxdist)
found = 0;
if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
if(found && pt->mindist)
if(curroff - mdata->partoff[pt->sigid - 1][j] < pt->mindist)
found = 0;
if(found)
break;
}
if(found) {
mdata->maxshift[pt->sigid - 1] = mdata->partoff[pt->sigid - 1][j] + pt->maxdist - curroff;
mdata->partoff[pt->sigid - 1][0] = curroff + pt->length;
mdata->offcnt[pt->sigid - 1] = 1;
if(++mdata->partcnt[pt->sigid - 1] + 1 == pt->parts) {
if(pt->type) {
if(otfrec) {
if(pt->type > type || pt->type >= CL_TYPE_SFX) {
cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position - pt->prefix_length);
cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
type = pt->type;
if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
cli_errmsg("Can't alloc memory for new type node\n");
cli_errmsg("cli_ac_scanbuff(): Can't allocate memory for new type node\n");
return CL_EMEM;
}
tnode->type = type;
tnode->offset = offset + position - pt->prefix_length;
tnode->offset = -1; /* we don't remember the offset of the first part */
if(*ftoffset)
tnode->cnt = (*ftoffset)->cnt + 1;
@ -374,7 +487,6 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char
tnode->next = *ftoffset;
*ftoffset = tnode;
}
}
}
@ -392,15 +504,15 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char
if(pt->type) {
if(otfrec) {
if(pt->type > type || pt->type >= CL_TYPE_SFX) {
cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position - pt->prefix_length);
cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, curroff);
type = pt->type;
if(ftoffset && (!*ftoffset ||(*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) {
if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
cli_errmsg("Can't alloc memory for new type node\n");
cli_errmsg("cli_ac_scanbuff(): Can't allocate memory for new type node\n");
return CL_EMEM;
}
tnode->type = type;
tnode->offset = offset + position - pt->prefix_length;
tnode->offset = curroff;
if(*ftoffset)
tnode->cnt = (*ftoffset)->cnt + 1;

@ -25,9 +25,20 @@
#include "filetypes.h"
#define AC_DEFAULT_DEPTH 2
#define AC_DEFAULT_TRACKLEN 8
struct cli_ac_data {
unsigned int partsigs;
unsigned int *partcnt;
unsigned int **partoff;
unsigned int *offcnt;
int *maxshift;
};
int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern);
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, int *partcnt, unsigned short otfrec, unsigned long int offset, unsigned long int *partoff, unsigned short ftype, int fd, struct cli_matched_type **ftoffset);
int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int histlen);
void cli_ac_freedata(struct cli_ac_data *data);
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset);
int cli_ac_buildtrie(struct cli_matcher *root);
void cli_ac_free(struct cli_matcher *root);
void cli_ac_setdepth(unsigned int depth);

@ -32,15 +32,14 @@
/* #define BM_TEST_OFFSET 5 */
#define BM_BLOCK_SIZE 3
#define HASH(a,b,c) 211 * (unsigned char) a + 37 * (unsigned char) b + (unsigned char) c
#define DHASH(a,b,c) 211 * a + 37 * b + c
#define HASH(a,b,c) (211 * a + 37 * b + c)
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
{
int i;
uint16_t idx;
const char *pt = pattern->pattern;
const unsigned char *pt = pattern->pattern;
struct cli_bm_patt *prev, *next = NULL;
@ -80,7 +79,7 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
int cli_bm_init(struct cli_matcher *root)
{
unsigned int i;
unsigned int size = DHASH(256, 256, 256);
unsigned int size = HASH(256, 256, 256);
cli_dbgmsg("in cli_bm_init()\n");
@ -104,7 +103,7 @@ void cli_bm_free(struct cli_matcher *root)
{
struct cli_bm_patt *b1, *b2;
unsigned int i;
unsigned int size = DHASH(256, 256, 256);
unsigned int size = HASH(256, 256, 256);
if(root->bm_shift)
@ -129,14 +128,14 @@ void cli_bm_free(struct cli_matcher *root)
}
}
int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd)
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd)
{
unsigned int i, j, shift, off, found = 0;
int idxtest;
uint16_t idx;
struct cli_bm_patt *p;
const char *bp;
char prefix;
const unsigned char *bp;
unsigned char prefix;
if(!root->bm_shift)

@ -26,7 +26,7 @@
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern);
int cli_bm_init(struct cli_matcher *root);
int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd);
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd);
void cli_bm_free(struct cli_matcher *root);
#endif

@ -52,10 +52,10 @@ extern short cli_debug_flag;
#endif
int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype)
int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype)
{
int ret = CL_CLEAN, i, tid = 0, *partcnt;
unsigned long int *partoff;
int ret = CL_CLEAN, i, tid = 0;
struct cli_ac_data mdata;
struct cli_matcher *groot, *troot = NULL;
#ifdef HAVE_NCORE
void *streamhandle;
@ -209,43 +209,26 @@ int cli_scanbuff(const char *buffer, unsigned int length, const char **virname,
if(troot) {
if((partcnt = (int *) cli_calloc(troot->ac_partsigs + 1, sizeof(int))) == NULL) {
cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(int));
return CL_EMEM;
}
if((partoff = (unsigned long int *) cli_calloc(troot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(unsigned long int));
free(partcnt);
return CL_EMEM;
}
if((ret = cli_ac_initdata(&mdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
return ret;
if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, 0, ftype, -1)) != CL_VIRUS)
ret = cli_ac_scanbuff(buffer, length, virname, troot, partcnt, 0, 0, partoff, ftype, -1, NULL);
ret = cli_ac_scanbuff(buffer, length, virname, troot, &mdata, 0, 0, ftype, -1, NULL);
free(partcnt);
free(partoff);
cli_ac_freedata(&mdata);
if(ret == CL_VIRUS)
return ret;
}
if((partcnt = (int *) cli_calloc(groot->ac_partsigs + 1, sizeof(int))) == NULL) {
cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(int));
return CL_EMEM;
}
if((partoff = (unsigned long int *) cli_calloc(groot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
cli_dbgmsg("cli_scanbuff(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(unsigned long int));
free(partcnt);
return CL_EMEM;
}
if((ret = cli_ac_initdata(&mdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
return ret;
if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, 0, ftype, -1)) != CL_VIRUS)
ret = cli_ac_scanbuff(buffer, length, virname, groot, partcnt, 0, 0, partoff, ftype, -1, NULL);
ret = cli_ac_scanbuff(buffer, length, virname, groot, &mdata, 0, 0, ftype, -1, NULL);
cli_ac_freedata(&mdata);
free(partcnt);
free(partoff);
return ret;
}
@ -408,10 +391,11 @@ int cli_validatesig(unsigned short ftype, const char *offstr, unsigned long int
int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset)
{
char *buffer, *buff, *endbl, *pt;
int ret = CL_CLEAN, *gpartcnt = NULL, *tpartcnt = NULL, type = CL_CLEAN, i, tid = 0, bytes;
unsigned char *buffer, *buff, *endbl, *upt;
int ret = CL_CLEAN, type = CL_CLEAN, i, tid = 0, bytes;
unsigned int buffersize, length, maxpatlen, shift = 0;
unsigned long int *gpartoff = NULL, *tpartoff = NULL, offset = 0;
unsigned long int offset = 0;
struct cli_ac_data gdata, tdata;
MD5_CTX md5ctx;
unsigned char digest[16];
struct cli_md5_node *md5_node;
@ -423,6 +407,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
uint32_t datamask[2] = { 0xffffffff, 0xffffffff };
int count, hret;
off_t origoff;
char *pt;
#endif
@ -644,42 +629,17 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
/* prepare the buffer */
buffersize = maxpatlen + SCANBUFF;
if(!(buffer = (char *) cli_calloc(buffersize, sizeof(char)))) {
if(!(buffer = (unsigned char *) cli_calloc(buffersize, sizeof(unsigned char)))) {
cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d)\n", buffersize);
return CL_EMEM;
}
if((gpartcnt = (int *) cli_calloc(groot->ac_partsigs + 1, sizeof(int))) == NULL) {
cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(int));
free(buffer);
return CL_EMEM;
}
if((gpartoff = (unsigned long int *) cli_calloc(groot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", groot->ac_partsigs + 1, sizeof(unsigned long int));
free(buffer);
free(gpartcnt);
return CL_EMEM;
}
if((ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
return ret;
if(troot) {
if((tpartcnt = (int *) cli_calloc(troot->ac_partsigs + 1, sizeof(int))) == NULL) {
cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(int));
free(buffer);
free(gpartcnt);
free(gpartoff);
return CL_EMEM;
}
if((tpartoff = (unsigned long int *) cli_calloc(troot->ac_partsigs + 1, sizeof(unsigned long int))) == NULL) {
cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%d, %d)\n", troot->ac_partsigs + 1, sizeof(unsigned long int));
free(buffer);
free(gpartcnt);
free(gpartoff);
free(tpartcnt);
return CL_EMEM;
}
if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
return ret;
}
if(ctx->engine->md5_hlist)
@ -689,29 +649,27 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
buff = buffer;
buff += maxpatlen; /* pointer to read data block */
endbl = buff + SCANBUFF - maxpatlen; /* pointer to the last block
* length of maxpatlen
*/
* length of maxpatlen
*/
pt = buff;
upt = buff;
while((bytes = cli_readn(desc, buff + shift, SCANBUFF - shift)) > 0) {
if(ctx->scanned)
*ctx->scanned += bytes / CL_COUNT_PRECISION;
length = shift + bytes;
if(pt == buffer)
if(upt == buffer)
length += maxpatlen;
if(troot) {
if(troot->ac_only || (ret = cli_bm_scanbuff(pt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS)
ret = cli_ac_scanbuff(pt, length, ctx->virname, troot, tpartcnt, otfrec, offset, tpartoff, ftype, desc, ftoffset);
if(troot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS)
ret = cli_ac_scanbuff(upt, length, ctx->virname, troot, &tdata, otfrec, offset, ftype, desc, ftoffset);
if(ret == CL_VIRUS) {
free(buffer);
free(gpartcnt);
free(gpartoff);
free(tpartcnt);
free(tpartoff);
cli_ac_freedata(&gdata);
cli_ac_freedata(&tdata);
lseek(desc, 0, SEEK_SET);
if(cli_checkfp(desc, ctx->engine))
@ -721,17 +679,14 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
}
}
if(groot->ac_only || (ret = cli_bm_scanbuff(pt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
ret = cli_ac_scanbuff(pt, length, ctx->virname, groot, gpartcnt, otfrec, offset, gpartoff, ftype, desc, ftoffset);
if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
if(ret == CL_VIRUS) {
free(buffer);
free(gpartcnt);
free(gpartoff);
if(troot) {
free(tpartcnt);
free(tpartoff);
}
cli_ac_freedata(&gdata);
if(troot)
cli_ac_freedata(&tdata);
lseek(desc, 0, SEEK_SET);
if(cli_checkfp(desc, ctx->engine))
return CL_CLEAN;
@ -750,8 +705,8 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
memmove(buffer, endbl, maxpatlen);
offset += SCANBUFF;
if(pt == buff) {
pt = buffer;
if(upt == buff) {
upt = buffer;
offset -= maxpatlen;
}
@ -764,12 +719,9 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
}
free(buffer);
free(gpartcnt);
free(gpartoff);
if(troot) {
free(tpartcnt);
free(tpartoff);
}
cli_ac_freedata(&gdata);
if(troot)
cli_ac_freedata(&tdata);
if(ctx->engine->md5_hlist) {
MD5_Final(digest, &md5ctx);

@ -28,7 +28,7 @@
int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset);
int cli_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype);
int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype);
int cli_validatesig(unsigned short ftype, const char *offstr, unsigned long int fileoff, int desc, const char *virname);

@ -19,6 +19,9 @@
* MA 02110-1301, USA.
*
* $Log: regex_list.c,v $
* Revision 1.15 2006/11/15 15:26:54 tkojm
* pattern matcher accuracy improvements
*
* Revision 1.14 2006/11/05 18:16:56 acab
* Patch for bug 52 from Edvin
*
@ -350,8 +353,8 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch
size_t buffer_len = (hostOnly && !is_whitelist) ? real_len : real_len + display_len + 1;
char* buffer = cli_malloc(buffer_len+1);
size_t i;
int partcnt,rc = 0;
unsigned long int partoff;
int rc = 0;
struct cli_ac_data mdata;
if(!buffer)
return CL_EMEM;
@ -364,13 +367,18 @@ int regex_list_match(struct regex_matcher* matcher,const char* real_url,const ch
}
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
if(hostOnly)
if(hostOnly) {
if((rc = cli_ac_initdata(&mdata, 0, AC_DEFAULT_TRACKLEN)))
return rc;
rc = 0;
for(i = 0; i < matcher->root_hosts_cnt; i++) {
if(( rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&partcnt,0,0,&partoff,0,-1,NULL) ))
if(( rc = cli_ac_scanbuff((unsigned char*)buffer,buffer_len,info, &matcher->root_hosts[i] ,&mdata,0,0,0,-1,NULL) ))
break;
}
else
} else
rc = 0;
if(!rc && !hostOnly)
rc = match_node(matcher->root_regex,(unsigned char*)buffer,buffer_len,info) == MATCH_SUCCESS ? CL_VIRUS : CL_SUCCESS;
free(buffer);

@ -956,7 +956,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx)
if(ctx->scanned)
*ctx->scanned += data_len / CL_COUNT_PRECISION;
if(cli_scanbuff((char *) data, data_len, ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) {
if(cli_scanbuff(data, data_len, ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) {
free(data);
ret = CL_VIRUS;
break;
@ -1003,7 +1003,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx)
} else {
if(ctx->scanned)
*ctx->scanned += vba_project->length[i] / CL_COUNT_PRECISION;
if(cli_scanbuff((char *) data, vba_project->length[i], ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) {
if(cli_scanbuff(data, vba_project->length[i], ctx->virname, ctx->engine, CL_TYPE_MSOLE2) == CL_VIRUS) {
free(data);
ret = CL_VIRUS;
break;

Loading…
Cancel
Save