minor code cleanup; load balance bm_suffix

git-svn: trunk@3360
remotes/push_mirror/metadata
Tomasz Kojm 18 years ago
parent d9a9e1fcbe
commit ab1db3b3f1
  1. 4
      ChangeLog
  2. 126
      libclamav/matcher-bm.c
  3. 5
      libclamav/matcher-bm.h
  4. 2
      libclamav/matcher.h

@ -1,3 +1,7 @@
Tue Nov 6 17:17:56 CET 2007 (tk)
---------------------------------
* libclamav/matcher-bm.c: minor code cleanup; load balance bm_suffix
Tue Nov 6 16:13:08 GMT 2007 (njh) Tue Nov 6 16:13:08 GMT 2007 (njh)
---------------------------------- ----------------------------------
* libclamav/vba_extract.c: Removed more unused code * libclamav/vba_extract.c: Removed more unused code

@ -30,21 +30,13 @@
#include "matcher-bm.h" #include "matcher-bm.h"
#include "filetypes.h" #include "filetypes.h"
/* TODO: Check prefix regularity and automatically transfer some signatures
* to AC
*/
#define BM_MIN_LENGTH 3 #define BM_MIN_LENGTH 3
/* #define BM_TEST_OFFSET 5 */
#define BM_BLOCK_SIZE 3 #define BM_BLOCK_SIZE 3
#define HASH(a,b,c) (211 * a + 37 * b + c) #define HASH(a,b,c) (211 * a + 37 * b + c)
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern) int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
{ {
int i; uint16_t idx, i;
uint16_t idx;
const unsigned char *pt = pattern->pattern; const unsigned char *pt = pattern->pattern;
struct cli_bm_patt *prev, *next = NULL; struct cli_bm_patt *prev, *next = NULL;
@ -54,16 +46,29 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
return CL_EPATSHORT; return CL_EPATSHORT;
} }
for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i >= 0; i--) { #if BM_MIN_LENGTH == BM_BLOCK_SIZE
/* try to load balance bm_suffix (at the cost of bm_shift) */
for(i = 0; i < pattern->length - BM_BLOCK_SIZE + 1; i++) {
idx = HASH(pt[i], pt[i + 1], pt[i + 2]); idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i); if(!root->bm_suffix[idx]) {
if(i) {
pattern->prefix = pattern->pattern;
pattern->prefix_length = i;
pattern->pattern = &pattern->pattern[i];
pattern->length -= i;
pt = pattern->pattern;
}
break;
}
} }
#endif
i = BM_MIN_LENGTH - BM_BLOCK_SIZE; for(i = 0; i <= BM_MIN_LENGTH - BM_BLOCK_SIZE; i++) {
idx = HASH(pt[i], pt[i + 1], pt[i + 2]); idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
}
prev = next = root->bm_suffix[idx]; prev = next = root->bm_suffix[idx];
while(next) { while(next) {
if(pt[0] >= next->pattern[0]) if(pt[0] >= next->pattern[0])
break; break;
@ -73,25 +78,24 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
if(next == root->bm_suffix[idx]) { if(next == root->bm_suffix[idx]) {
pattern->next = root->bm_suffix[idx]; pattern->next = root->bm_suffix[idx];
if(root->bm_suffix[idx])
pattern->cnt = root->bm_suffix[idx]->cnt;
root->bm_suffix[idx] = pattern; root->bm_suffix[idx] = pattern;
} else { } else {
pattern->next = prev->next; pattern->next = prev->next;
prev->next = pattern; prev->next = pattern;
} }
root->bm_suffix[idx]->cnt++;
return 0; return CL_SUCCESS;
} }
int cli_bm_init(struct cli_matcher *root) int cli_bm_init(struct cli_matcher *root)
{ {
unsigned int i; uint16_t i, size = HASH(255, 255, 255) + 1;
unsigned int size = HASH(256, 256, 256);
cli_dbgmsg("in cli_bm_init()\n"); if(!(root->bm_shift = (uint8_t *) cli_malloc(size * sizeof(uint8_t))))
cli_dbgmsg("BM: Number of indexes = %d\n", size);
if(!(root->bm_shift = (int *) cli_malloc(size * sizeof(int))))
return CL_EMEM; return CL_EMEM;
if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) { if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) {
@ -102,14 +106,13 @@ int cli_bm_init(struct cli_matcher *root)
for(i = 0; i < size; i++) for(i = 0; i < size; i++)
root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1; root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
return 0; return CL_SUCCESS;
} }
void cli_bm_free(struct cli_matcher *root) void cli_bm_free(struct cli_matcher *root)
{ {
struct cli_bm_patt *b1, *b2; struct cli_bm_patt *patt, *prev;
unsigned int i; uint16_t i, size = HASH(255, 255, 255) + 1;
unsigned int size = HASH(256, 256, 256);
if(root->bm_shift) if(root->bm_shift)
@ -117,17 +120,19 @@ void cli_bm_free(struct cli_matcher *root)
if(root->bm_suffix) { if(root->bm_suffix) {
for(i = 0; i < size; i++) { for(i = 0; i < size; i++) {
b1 = root->bm_suffix[i]; patt = root->bm_suffix[i];
while(b1) { while(patt) {
b2 = b1; prev = patt;
b1 = b1->next; patt = patt->next;
if(b2->virname) if(prev->prefix)
free(b2->virname); free(prev->prefix);
if(b2->offset) else
free(b2->offset); free(prev->pattern);
if(b2->pattern) if(prev->virname)
free(b2->pattern); free(prev->virname);
free(b2); if(prev->offset)
free(prev->offset);
free(prev);
} }
} }
free(root->bm_suffix); free(root->bm_suffix);
@ -136,11 +141,11 @@ void cli_bm_free(struct cli_matcher *root)
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, cli_file_t ftype, int fd) int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, cli_file_t ftype, int fd)
{ {
unsigned int i, j, shift, off, found = 0; uint32_t i, j, off;
int idxtest; uint8_t found, pchain, shift;
uint16_t idx; uint16_t idx, idxchk;
struct cli_bm_patt *p; struct cli_bm_patt *p;
const unsigned char *bp; const unsigned char *bp, *pt;
unsigned char prefix; unsigned char prefix;
struct cli_target_info info; struct cli_target_info info;
@ -155,49 +160,56 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) { for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) {
idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]); idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
shift = root->bm_shift[idx]; shift = root->bm_shift[idx];
if(shift == 0) { if(shift == 0) {
prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE]; prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
p = root->bm_suffix[idx]; p = root->bm_suffix[idx];
pchain = 0;
while(p) {
if(p->pattern[0] != prefix) {
if(pchain)
break;
p = p->next;
continue;
} else pchain = 1;
while(p && p->pattern[0] != prefix)
p = p->next;
while(p && p->pattern[0] == prefix) {
off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE; off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
bp = buffer + off; bp = buffer + off;
#ifdef BM_TEST_OFFSET if((off + p->length > length) || (p->prefix_length > off)) {
if(bp[BM_TEST_OFFSET] != p->pattern[BM_TEST_OFFSET]) {
p = p->next; p = p->next;
continue; continue;
} }
#endif
idxtest = MIN (p->length, length - off ) - 1; idxchk = MIN(p->length, length - off) - 1;
if(idxtest >= 0) { if(idxchk) {
if(bp[idxtest] != p->pattern[idxtest]) { if((bp[idxchk] != p->pattern[idxchk]) || (bp[idxchk / 2] != p->pattern[idxchk / 2])) {
p = p->next; p = p->next;
continue; continue;
} }
} }
if(p->prefix_length) {
off -= p->prefix_length;
bp -= p->prefix_length;
pt = p->prefix;
} else {
pt = p->pattern;
}
found = 1; found = 1;
for(j = 0; j < p->length && off < length; j++, off++) { for(j = 0; j < p->length + p->prefix_length && off < length; j++, off++) {
if(bp[j] != p->pattern[j]) { if(bp[j] != pt[j]) {
found = 0; found = 0;
break; break;
} }
} }
if(found && p->length == j) { if(found && p->length + p->prefix_length == j) {
if(p->target || p->offset) { if(p->target || p->offset) {
off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE; off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE;
if((fd == -1 && !ftype) || !cli_validatesig(ftype, p->offset, off, &info, fd, p->virname)) { if((fd == -1 && !ftype) || !cli_validatesig(ftype, p->offset, off, &info, fd, p->virname)) {
p = p->next; p = p->next;
continue; continue;

@ -24,11 +24,12 @@
#include "cltypes.h" #include "cltypes.h"
struct cli_bm_patt { struct cli_bm_patt {
unsigned char *pattern; unsigned char *pattern, *prefix;
uint32_t length; uint16_t length, prefix_length;
char *virname, *offset; char *virname, *offset;
uint8_t target; uint8_t target;
struct cli_bm_patt *next; struct cli_bm_patt *next;
uint16_t cnt;
}; };
int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern); int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern);

@ -43,7 +43,7 @@ struct cli_matcher {
uint8_t ac_only; uint8_t ac_only;
/* Extended Boyer-Moore */ /* Extended Boyer-Moore */
int32_t *bm_shift; uint8_t *bm_shift;
struct cli_bm_patt **bm_suffix; struct cli_bm_patt **bm_suffix;
uint32_t *soff, soff_len; /* for PE section sigs */ uint32_t *soff, soff_len; /* for PE section sigs */

Loading…
Cancel
Save