From 994ae4376d720f6f9b50f3fe07d6596e19c9d3b1 Mon Sep 17 00:00:00 2001 From: Tomasz Kojm Date: Sat, 25 Mar 2006 00:10:15 +0000 Subject: [PATCH] optimise scanning of SFX archives git-svn: trunk@1871 --- clamav-devel/ChangeLog | 4 ++++ clamav-devel/libclamav/filetypes.h | 6 ++++- clamav-devel/libclamav/matcher-ac.c | 23 ++++++++++++++---- clamav-devel/libclamav/scanners.c | 36 +++++++++++++++++++++-------- 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/clamav-devel/ChangeLog b/clamav-devel/ChangeLog index c348e00bd..272b8792a 100644 --- a/clamav-devel/ChangeLog +++ b/clamav-devel/ChangeLog @@ -1,3 +1,7 @@ +Sat Mar 25 01:05:50 CET 2006 (tk) +--------------------------------- + * libclamav: optimise scanning of SFX archives + Wed Mar 22 19:04:15 CET 2006 (tk) --------------------------------- * libclamav/htmlnorm.c: fix typo spotted by Gianluigi Tiesi diff --git a/clamav-devel/libclamav/filetypes.h b/clamav-devel/libclamav/filetypes.h index ee057b652..5baef1255 100644 --- a/clamav-devel/libclamav/filetypes.h +++ b/clamav-devel/libclamav/filetypes.h @@ -20,8 +20,11 @@ #ifndef __FILETYPES_H #define __FILETYPES_H +#include + #define MAGIC_BUFFER_SIZE 50 #define CL_TYPENO 500 +#define SFX_MAX_TESTS 10 typedef enum { CL_TYPE_UNKNOWN_TEXT = CL_TYPENO, @@ -60,7 +63,8 @@ typedef enum { struct cli_matched_type { cli_file_t type; - size_t offset; + off_t offset; + unsigned short cnt; struct cli_matched_type *next; }; diff --git a/clamav-devel/libclamav/matcher-ac.c b/clamav-devel/libclamav/matcher-ac.c index 7db34a6e4..402073442 100644 --- a/clamav-devel/libclamav/matcher-ac.c +++ b/clamav-devel/libclamav/matcher-ac.c @@ -318,17 +318,25 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam if(pt->type) { if(otfrec) { if(pt->type > type || pt->type >= CL_TYPE_SFX) { - cli_dbgmsg("Matched signature for file type: %s\n", pt->virname); + cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position); type = pt->type; - if(ftoffset && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) { + if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) { if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) { cli_errmsg("Can't alloc memory for new type node\n"); return CL_EMEM; } + tnode->type = type; tnode->offset = offset + position; + + if(*ftoffset) + tnode->cnt = (*ftoffset)->cnt + 1; + else + tnode->cnt = 1; + tnode->next = *ftoffset; *ftoffset = tnode; + } } } @@ -346,16 +354,21 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam if(pt->type) { if(otfrec) { if(pt->type > type || pt->type >= CL_TYPE_SFX) { - cli_dbgmsg("Matched signature for file type: %s\n", pt->virname); - + cli_dbgmsg("Matched signature for file type %s at %d\n", pt->virname, offset + position); type = pt->type; - if(ftoffset && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) { + if(ftoffset && (!*ftoffset ||(*ftoffset)->cnt < SFX_MAX_TESTS) && ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) { if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) { cli_errmsg("Can't alloc memory for new type node\n"); return CL_EMEM; } tnode->type = type; tnode->offset = offset + position; + + if(*ftoffset) + tnode->cnt = (*ftoffset)->cnt + 1; + else + tnode->cnt = 1; + tnode->next = *ftoffset; *ftoffset = tnode; } diff --git a/clamav-devel/libclamav/scanners.c b/clamav-devel/libclamav/scanners.c index addc9c963..fd1854648 100644 --- a/clamav-devel/libclamav/scanners.c +++ b/clamav-devel/libclamav/scanners.c @@ -107,7 +107,7 @@ static void cli_unlock_mutex(void *mtx) #endif */ -static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset) +static int cli_scanrar(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_check) { int fd, ret = CL_CLEAN; unsigned int files = 0; @@ -126,7 +126,9 @@ static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset) return CL_ETMPDIR; } - lseek(desc, offset, SEEK_SET); + if(sfx_offset) + lseek(desc, sfx_offset, SEEK_SET); + metadata = metadata_tmp = cli_unrar(desc, dir, ctx->limits); if(cli_scandir(dir, ctx) == CL_VIRUS) { @@ -135,6 +137,13 @@ static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset) files++; + if(files == 1 && sfx_check) { + if(*sfx_check == metadata->crc) + break; + else + *sfx_check = metadata->crc; + } + cli_dbgmsg("RAR: %s, crc32: 0x%x, encrypted: %d, compressed: %u, normal: %u, method: %d, ratio: %d (max: %d)\n", metadata->filename, metadata->crc, metadata->encrypted, metadata->pack_size, metadata->unpack_size, metadata->method, @@ -254,7 +263,7 @@ static int cli_scanrar(int desc, cli_ctx *ctx, unsigned long int offset) } #ifdef HAVE_ZLIB_H -static int cli_scanzip(int desc, cli_ctx *ctx, unsigned long int offset) +static int cli_scanzip(int desc, cli_ctx *ctx, off_t sfx_offset, uint32_t *sfx_check) { ZZIP_DIR *zdir; ZZIP_DIRENT zdirent; @@ -272,8 +281,8 @@ static int cli_scanzip(int desc, cli_ctx *ctx, unsigned long int offset) cli_dbgmsg("in scanzip()\n"); - if(offset) - lseek(desc, offset, SEEK_SET); + if(sfx_offset) + lseek(desc, sfx_offset, SEEK_SET); if((zdir = zzip_dir_fdopen(dup(desc), &err)) == NULL) { cli_dbgmsg("Zip: zzip_dir_fdopen() return code: %d\n", err); @@ -292,6 +301,13 @@ static int cli_scanzip(int desc, cli_ctx *ctx, unsigned long int offset) while(zzip_dir_read(zdir, &zdirent)) { files++; + if(files == 1 && sfx_check) { + if(*sfx_check == zdirent.d_crc32) + break; + else + *sfx_check = zdirent.d_crc32; + } + if(!zdirent.d_name || !strlen(zdirent.d_name)) { /* Mimail fix */ cli_dbgmsg("Zip: strlen(zdirent.d_name) == %d\n", strlen(zdirent.d_name)); *ctx->virname = "Suspect.Zip"; @@ -1492,6 +1508,7 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type) int ret = CL_CLEAN; unsigned short ftrec; struct cli_matched_type *ftoffset = NULL, *fpt; + uint32_t lastzip, lastrar; switch(type) { @@ -1536,15 +1553,16 @@ static int cli_scanraw(int desc, cli_ctx *ctx, cli_file_t type) case CL_TYPE_ZIPSFX: if(type == CL_TYPE_MSEXE) { if(SCAN_ARCHIVE) { + lastzip = lastrar = 0xdeadbeef; fpt = ftoffset; while(fpt) { if(fpt->type == CL_TYPE_RARSFX) { cli_dbgmsg("RAR-SFX signature found at %d\n", fpt->offset); - if((ret = cli_scanrar(desc, ctx, fpt->offset) == CL_VIRUS)) + if((ret = cli_scanrar(desc, ctx, fpt->offset, &lastrar) == CL_VIRUS)) break; } else if(fpt->type == CL_TYPE_ZIPSFX) { cli_dbgmsg("ZIP-SFX signature found at %d\n", fpt->offset); - if((ret = cli_scanzip(desc, ctx, fpt->offset) == CL_VIRUS)) + if((ret = cli_scanzip(desc, ctx, fpt->offset, &lastzip) == CL_VIRUS)) break; } fpt = fpt->next; @@ -1633,12 +1651,12 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx) switch(type) { case CL_TYPE_RAR: if(SCAN_ARCHIVE) - ret = cli_scanrar(desc, ctx, 0); + ret = cli_scanrar(desc, ctx, 0, NULL); break; case CL_TYPE_ZIP: if(SCAN_ARCHIVE) - ret = cli_scanzip(desc, ctx, 0); + ret = cli_scanzip(desc, ctx, 0, NULL); break; case CL_TYPE_GZ: