From ab1db3b3f19b033fd265f06c1383038b17fe045f Mon Sep 17 00:00:00 2001 From: Tomasz Kojm Date: Tue, 6 Nov 2007 17:16:07 +0000 Subject: [PATCH] minor code cleanup; load balance bm_suffix git-svn: trunk@3360 --- ChangeLog | 4 ++ libclamav/matcher-bm.c | 126 ++++++++++++++++++++++------------------- libclamav/matcher-bm.h | 5 +- libclamav/matcher.h | 2 +- 4 files changed, 77 insertions(+), 60 deletions(-) diff --git a/ChangeLog b/ChangeLog index 43dd4b1cb..496c31e03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Tue Nov 6 17:17:56 CET 2007 (tk) +--------------------------------- + * libclamav/matcher-bm.c: minor code cleanup; load balance bm_suffix + Tue Nov 6 16:13:08 GMT 2007 (njh) ---------------------------------- * libclamav/vba_extract.c: Removed more unused code diff --git a/libclamav/matcher-bm.c b/libclamav/matcher-bm.c index c4e1167f4..92af43ab1 100644 --- a/libclamav/matcher-bm.c +++ b/libclamav/matcher-bm.c @@ -30,21 +30,13 @@ #include "matcher-bm.h" #include "filetypes.h" -/* TODO: Check prefix regularity and automatically transfer some signatures - * to AC - */ - #define BM_MIN_LENGTH 3 -/* #define BM_TEST_OFFSET 5 */ #define BM_BLOCK_SIZE 3 - #define HASH(a,b,c) (211 * a + 37 * b + c) - int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern) { - int i; - uint16_t idx; + uint16_t idx, i; const unsigned char *pt = pattern->pattern; struct cli_bm_patt *prev, *next = NULL; @@ -54,16 +46,29 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern) return CL_EPATSHORT; } - for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i >= 0; i--) { +#if BM_MIN_LENGTH == BM_BLOCK_SIZE + /* try to load balance bm_suffix (at the cost of bm_shift) */ + for(i = 0; i < pattern->length - BM_BLOCK_SIZE + 1; i++) { idx = HASH(pt[i], pt[i + 1], pt[i + 2]); - root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i); + if(!root->bm_suffix[idx]) { + if(i) { + pattern->prefix = pattern->pattern; + pattern->prefix_length = i; + pattern->pattern = &pattern->pattern[i]; + pattern->length -= i; + pt = pattern->pattern; + } + break; + } } +#endif - i = BM_MIN_LENGTH - BM_BLOCK_SIZE; - idx = HASH(pt[i], pt[i + 1], pt[i + 2]); + for(i = 0; i <= BM_MIN_LENGTH - BM_BLOCK_SIZE; i++) { + idx = HASH(pt[i], pt[i + 1], pt[i + 2]); + root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i); + } prev = next = root->bm_suffix[idx]; - while(next) { if(pt[0] >= next->pattern[0]) break; @@ -73,25 +78,24 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern) if(next == root->bm_suffix[idx]) { pattern->next = root->bm_suffix[idx]; + if(root->bm_suffix[idx]) + pattern->cnt = root->bm_suffix[idx]->cnt; root->bm_suffix[idx] = pattern; } else { pattern->next = prev->next; prev->next = pattern; } + root->bm_suffix[idx]->cnt++; - return 0; + return CL_SUCCESS; } int cli_bm_init(struct cli_matcher *root) { - unsigned int i; - unsigned int size = HASH(256, 256, 256); + uint16_t i, size = HASH(255, 255, 255) + 1; - cli_dbgmsg("in cli_bm_init()\n"); - cli_dbgmsg("BM: Number of indexes = %d\n", size); - - if(!(root->bm_shift = (int *) cli_malloc(size * sizeof(int)))) + if(!(root->bm_shift = (uint8_t *) cli_malloc(size * sizeof(uint8_t)))) return CL_EMEM; if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) { @@ -102,14 +106,13 @@ int cli_bm_init(struct cli_matcher *root) for(i = 0; i < size; i++) root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1; - return 0; + return CL_SUCCESS; } void cli_bm_free(struct cli_matcher *root) { - struct cli_bm_patt *b1, *b2; - unsigned int i; - unsigned int size = HASH(256, 256, 256); + struct cli_bm_patt *patt, *prev; + uint16_t i, size = HASH(255, 255, 255) + 1; if(root->bm_shift) @@ -117,17 +120,19 @@ void cli_bm_free(struct cli_matcher *root) if(root->bm_suffix) { for(i = 0; i < size; i++) { - b1 = root->bm_suffix[i]; - while(b1) { - b2 = b1; - b1 = b1->next; - if(b2->virname) - free(b2->virname); - if(b2->offset) - free(b2->offset); - if(b2->pattern) - free(b2->pattern); - free(b2); + patt = root->bm_suffix[i]; + while(patt) { + prev = patt; + patt = patt->next; + if(prev->prefix) + free(prev->prefix); + else + free(prev->pattern); + if(prev->virname) + free(prev->virname); + if(prev->offset) + free(prev->offset); + free(prev); } } free(root->bm_suffix); @@ -136,11 +141,11 @@ void cli_bm_free(struct cli_matcher *root) int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, cli_file_t ftype, int fd) { - unsigned int i, j, shift, off, found = 0; - int idxtest; - uint16_t idx; + uint32_t i, j, off; + uint8_t found, pchain, shift; + uint16_t idx, idxchk; struct cli_bm_patt *p; - const unsigned char *bp; + const unsigned char *bp, *pt; unsigned char prefix; struct cli_target_info info; @@ -155,49 +160,56 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) { idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]); - shift = root->bm_shift[idx]; if(shift == 0) { - prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE]; p = root->bm_suffix[idx]; + pchain = 0; + while(p) { + if(p->pattern[0] != prefix) { + if(pchain) + break; + p = p->next; + continue; + } else pchain = 1; - while(p && p->pattern[0] != prefix) - p = p->next; - - while(p && p->pattern[0] == prefix) { off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE; bp = buffer + off; -#ifdef BM_TEST_OFFSET - if(bp[BM_TEST_OFFSET] != p->pattern[BM_TEST_OFFSET]) { + if((off + p->length > length) || (p->prefix_length > off)) { p = p->next; continue; } -#endif - idxtest = MIN (p->length, length - off ) - 1; - if(idxtest >= 0) { - if(bp[idxtest] != p->pattern[idxtest]) { + idxchk = MIN(p->length, length - off) - 1; + if(idxchk) { + if((bp[idxchk] != p->pattern[idxchk]) || (bp[idxchk / 2] != p->pattern[idxchk / 2])) { p = p->next; continue; } } + if(p->prefix_length) { + off -= p->prefix_length; + bp -= p->prefix_length; + pt = p->prefix; + } else { + pt = p->pattern; + } + found = 1; - for(j = 0; j < p->length && off < length; j++, off++) { - if(bp[j] != p->pattern[j]) { + for(j = 0; j < p->length + p->prefix_length && off < length; j++, off++) { + if(bp[j] != pt[j]) { found = 0; break; } } - if(found && p->length == j) { + if(found && p->length + p->prefix_length == j) { if(p->target || p->offset) { - off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE; - + off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE; if((fd == -1 && !ftype) || !cli_validatesig(ftype, p->offset, off, &info, fd, p->virname)) { p = p->next; continue; diff --git a/libclamav/matcher-bm.h b/libclamav/matcher-bm.h index e70fc2a8d..f687be192 100644 --- a/libclamav/matcher-bm.h +++ b/libclamav/matcher-bm.h @@ -24,11 +24,12 @@ #include "cltypes.h" struct cli_bm_patt { - unsigned char *pattern; - uint32_t length; + unsigned char *pattern, *prefix; + uint16_t length, prefix_length; char *virname, *offset; uint8_t target; struct cli_bm_patt *next; + uint16_t cnt; }; int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern); diff --git a/libclamav/matcher.h b/libclamav/matcher.h index ee9b950d8..6e0714aad 100644 --- a/libclamav/matcher.h +++ b/libclamav/matcher.h @@ -43,7 +43,7 @@ struct cli_matcher { uint8_t ac_only; /* Extended Boyer-Moore */ - int32_t *bm_shift; + uint8_t *bm_shift; struct cli_bm_patt **bm_suffix; uint32_t *soff, soff_len; /* for PE section sigs */