improve scanning of files whose types are detected on-the-fly

git-svn: trunk@2601
remotes/push_mirror/metadata
Tomasz Kojm 19 years ago
parent 0cf4cea7cb
commit 73218de24b
  1. 5
      clamav-devel/ChangeLog
  2. 11
      clamav-devel/libclamav/matcher-ac.c
  3. 2
      clamav-devel/libclamav/matcher-ac.h
  4. 3
      clamav-devel/libclamav/matcher-bm.c
  5. 3
      clamav-devel/libclamav/matcher-bm.h
  6. 81
      clamav-devel/libclamav/matcher.c
  7. 8
      clamav-devel/libclamav/matcher.h
  8. 21
      clamav-devel/libclamav/scanners.c

@ -1,3 +1,8 @@
Mon Jan 8 20:43:14 CET 2007 (tk)
---------------------------------
* libclamav: improve scanning of files whose types are detected on-the-fly
(closes bb#33)
Sun Jan 7 21:31:06 GMT 2007 (njh)
----------------------------------
* libclamav: Use HAVE_STDBOOL_H

@ -404,11 +404,11 @@ void cli_ac_freedata(struct cli_ac_data *data)
}
}
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset)
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
{
struct cli_ac_node *current;
struct cli_ac_patt *pt;
int type = CL_CLEAN, t, j;
int type = CL_CLEAN, j;
unsigned int i, position, curroff;
uint8_t offnum, found;
struct cli_matched_type *tnode;
@ -438,12 +438,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char
curroff = offset + position - pt->prefix_length;
if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
if(ftype == CL_TYPE_UNKNOWN_TEXT)
t = type;
else
t = ftype;
if((fd == -1 && !t) || !cli_validatesig(t, pt->offset, curroff, &info, fd, pt->virname)) {
if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, curroff, &info, fd, pt->virname)) {
pt = pt->next;
continue;
}

@ -40,7 +40,7 @@ struct cli_ac_data {
int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern);
int cli_ac_initdata(struct cli_ac_data *data, unsigned int partsigs, unsigned int histlen);
void cli_ac_freedata(struct cli_ac_data *data);
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, unsigned short ftype, int fd, struct cli_matched_type **ftoffset);
int cli_ac_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, unsigned short otfrec, unsigned long int offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset);
int cli_ac_buildtrie(struct cli_matcher *root);
void cli_ac_free(struct cli_matcher *root);
void cli_ac_setdepth(unsigned int depth);

@ -23,6 +23,7 @@
#include "cltypes.h"
#include "matcher.h"
#include "matcher-bm.h"
#include "filetypes.h"
/* TODO: Check prefix regularity and automatically transfer some signatures
* to AC
@ -128,7 +129,7 @@ void cli_bm_free(struct cli_matcher *root)
}
}
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd)
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, cli_file_t ftype, int fd)
{
unsigned int i, j, shift, off, found = 0;
int idxtest;

@ -23,10 +23,11 @@
#include "clamav.h"
#include "matcher.h"
#include "matcher-bm.h"
#include "filetypes.h"
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern);
int cli_bm_init(struct cli_matcher *root);
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, unsigned short ftype, int fd);
int cli_bm_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cli_matcher *root, unsigned long int offset, cli_file_t ftype, int fd);
void cli_bm_free(struct cli_matcher *root);
#endif

@ -46,11 +46,11 @@
#include "matcher-ncore.h"
#endif
static unsigned int targettab[CL_TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL, CL_TYPE_GRAPHICS, CL_TYPE_ELF };
static cli_file_t targettab[CL_TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL, CL_TYPE_GRAPHICS, CL_TYPE_ELF };
extern short cli_debug_flag;
int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype)
int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, cli_file_t ftype)
{
int ret = CL_CLEAN, i;
struct cli_ac_data mdata;
@ -121,7 +121,7 @@ struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine
return NULL;
}
off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, unsigned short ftype, int *ret)
off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret)
{
int (*einfo)(int, struct cli_exe_info *) = NULL;
unsigned int n;
@ -252,7 +252,7 @@ static int cli_checkfp(int fd, const struct cl_engine *engine)
return 0;
}
int cli_validatesig(unsigned short ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname)
int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname)
{
off_t offset;
int ret;
@ -275,7 +275,7 @@ int cli_validatesig(unsigned short ftype, const char *offstr, off_t fileoff, str
return 1;
}
int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset)
int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, cli_file_t ftype, unsigned short ftonly, struct cli_matched_type **ftoffset)
{
unsigned char *buffer, *buff, *endbl, *upt;
int ret = CL_CLEAN, type = CL_CLEAN, i, bytes;
@ -285,7 +285,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
MD5_CTX md5ctx;
unsigned char digest[16];
struct cli_md5_node *md5_node;
struct cli_matcher *groot, *troot = NULL;
struct cli_matcher *groot = NULL, *troot = NULL;
if(!ctx->engine) {
@ -303,7 +303,8 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
}
#endif
groot = ctx->engine->root[0]; /* generic signatures */
if(!ftonly)
groot = ctx->engine->root[0]; /* generic signatures */
if(ftype) {
for(i = 1; i < CL_TARGET_TABLE_SIZE; i++) {
@ -314,10 +315,17 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
}
}
if(troot)
maxpatlen = MAX(troot->maxpatlen, groot->maxpatlen);
else
maxpatlen = groot->maxpatlen;
if(ftonly) {
if(!troot)
return CL_CLEAN;
maxpatlen = troot->maxpatlen;
} else {
if(troot)
maxpatlen = MAX(troot->maxpatlen, groot->maxpatlen);
else
maxpatlen = groot->maxpatlen;
}
/* prepare the buffer */
buffersize = maxpatlen + SCANBUFF;
@ -326,7 +334,7 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
return CL_EMEM;
}
if((ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
if(!ftonly && (ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
return ret;
if(troot) {
@ -334,10 +342,9 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
return ret;
}
if(ctx->engine->md5_hlist)
if(!ftonly && ctx->engine->md5_hlist)
MD5_Init(&md5ctx);
buff = buffer;
buff += maxpatlen; /* pointer to read data block */
endbl = buff + SCANBUFF - maxpatlen; /* pointer to the last block
@ -360,7 +367,8 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
if(ret == CL_VIRUS) {
free(buffer);
cli_ac_freedata(&gdata);
if(!ftonly)
cli_ac_freedata(&gdata);
cli_ac_freedata(&tdata);
lseek(desc, 0, SEEK_SET);
@ -371,27 +379,29 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
}
}
if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
if(!ftonly) {
if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
if(ret == CL_VIRUS) {
free(buffer);
cli_ac_freedata(&gdata);
if(troot)
cli_ac_freedata(&tdata);
lseek(desc, 0, SEEK_SET);
if(cli_checkfp(desc, ctx->engine))
return CL_CLEAN;
else
return CL_VIRUS;
if(ret == CL_VIRUS) {
free(buffer);
cli_ac_freedata(&gdata);
if(troot)
cli_ac_freedata(&tdata);
lseek(desc, 0, SEEK_SET);
if(cli_checkfp(desc, ctx->engine))
return CL_CLEAN;
else
return CL_VIRUS;
} else if(otfrec && ret >= CL_TYPENO) {
if(ret > type)
type = ret;
}
} else if(otfrec && ret >= CL_TYPENO) {
if(ret > type)
type = ret;
}
if(ctx->engine->md5_hlist)
MD5_Update(&md5ctx, buff + shift, bytes);
if(ctx->engine->md5_hlist)
MD5_Update(&md5ctx, buff + shift, bytes);
}
if(bytes + shift == SCANBUFF) {
memmove(buffer, endbl, maxpatlen);
@ -411,11 +421,12 @@ int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short f
}
free(buffer);
cli_ac_freedata(&gdata);
if(!ftonly)
cli_ac_freedata(&gdata);
if(troot)
cli_ac_freedata(&tdata);
if(ctx->engine->md5_hlist) {
if(!ftonly && ctx->engine->md5_hlist) {
MD5_Final(digest, &md5ctx);
if((md5_node = cli_vermd5(digest, ctx->engine)) && !md5_node->fp) {

@ -37,14 +37,14 @@ struct cli_target_info {
int8_t status; /* 0 == not initialised, 1 == initialised OK, -1 == error */
};
int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, unsigned short ftype, struct cli_matched_type **ftoffset);
int cli_scandesc(int desc, cli_ctx *ctx, unsigned short otfrec, cli_file_t ftype, unsigned short ftonly, struct cli_matched_type **ftoffset);
int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, unsigned short ftype);
int cli_scanbuff(const unsigned char *buffer, unsigned int length, const char **virname, const struct cl_engine *engine, cli_file_t ftype);
int cli_validatesig(unsigned short ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname);
int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname);
struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine *engine);
off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, unsigned short ftype, int *ret);
off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret);
#endif

@ -196,7 +196,7 @@ static int cli_scanrar(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_c
if(DETECT_ENCRYPTED && metadata->encrypted) {
cli_dbgmsg("RAR: Encrypted files found in archive.\n");
lseek(desc, 0, SEEK_SET);
ret = cli_scandesc(desc, ctx, 0, 0, NULL);
ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL);
if(ret < 0) {
break;
} else if(ret != CL_VIRUS) {
@ -400,7 +400,7 @@ static int cli_scanzip(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_c
if(DETECT_ENCRYPTED && encrypted) {
cli_dbgmsg("Zip: Encrypted files found in archive.\n");
lseek(desc, 0, SEEK_SET);
ret = cli_scandesc(desc, ctx, 0, 0, NULL);
ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL);
if(ret < 0) {
break;
} else if(ret != CL_VIRUS) {
@ -1026,7 +1026,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx)
if (fd >= 0) {
ofd = cli_decode_ole_object(fd, dirname);
if (ofd >= 0) {
ret = cli_scandesc(ofd, ctx, 0, 0, NULL);
ret = cli_scandesc(ofd, ctx, 0, 0, 0, NULL);
close(ofd);
}
close(fd);
@ -1092,7 +1092,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
snprintf(fullname, 1024, "%s/comment.html", tempname);
fd = open(fullname, O_RDONLY|O_BINARY);
if (fd >= 0) {
ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, NULL);
ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
close(fd);
}
@ -1107,7 +1107,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
snprintf(fullname, 1024, "%s/nocomment.html", tempname);
fd = open(fullname, O_RDONLY|O_BINARY);
if (fd >= 0) {
ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, NULL);
ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
close(fd);
}
}
@ -1123,7 +1123,7 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
snprintf(fullname, 1024, "%s/script.html", tempname);
fd = open(fullname, O_RDONLY|O_BINARY);
if (fd >= 0) {
ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, NULL);
ret = cli_scandesc(fd, ctx, 0, CL_TYPE_HTML, 0, NULL);
close(fd);
}
}
@ -1597,7 +1597,7 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type)
return CL_EIO;
}
if((ret = cli_scandesc(desc, ctx, ftrec, type, &ftoffset)) == CL_VIRUS) {
if((ret = cli_scandesc(desc, ctx, ftrec, type, 0, &ftoffset)) == CL_VIRUS) {
cli_dbgmsg("%s found in descriptor %d.\n", *ctx->virname, desc);
return CL_VIRUS;
@ -1607,6 +1607,11 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type)
} else if(ret >= CL_TYPENO) {
lseek(desc, 0, SEEK_SET);
if((nret = cli_scandesc(desc, ctx, 0, ret, 1, NULL)) == CL_VIRUS) {
cli_dbgmsg("%s found in descriptor %d when scanning file type %u\n", *ctx->virname, desc, ret);
return CL_VIRUS;
}
ret == CL_TYPE_MAIL ? ctx->mrec++ : ctx->arec++;
switch(ret) {
case CL_TYPE_HTML:
@ -1692,7 +1697,7 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
if(!ctx->options) { /* raw mode (stdin, etc.) */
cli_dbgmsg("Raw mode: No support for special files\n");
if((ret = cli_scandesc(desc, ctx, 0, 0, NULL)) == CL_VIRUS)
if((ret = cli_scandesc(desc, ctx, 0, 0, 0, NULL)) == CL_VIRUS)
cli_dbgmsg("%s found in descriptor %d\n", *ctx->virname, desc);
return ret;
}

Loading…
Cancel
Save