various speed optimisations

git-svn: trunk@3218
remotes/push_mirror/metadata
Tomasz Kojm 18 years ago
parent be32043eb7
commit e38ab7c147
  1. 7
      ChangeLog
  2. 228
      libclamav/matcher-ac.c
  3. 2
      libclamav/matcher-ac.h

@ -1,3 +1,10 @@
Thu Sep 13 19:23:31 CEST 2007 (tk)
----------------------------------
* libclamav/matcher-ac.[ch]: various speed optimisations:
- optimise node usage
- try hard to not overload node 0x00.0x00[0x00]
- optimise memory usage
Thu Sep 13 17:37:31 BST 2007 (njh) Thu Sep 13 17:37:31 BST 2007 (njh)
---------------------------------- ----------------------------------
* libclamav: More optimisations * libclamav: More optimisations

@ -39,6 +39,7 @@
int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern) int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
{ {
struct cli_ac_node *pt, *next, **newtable; struct cli_ac_node *pt, *next, **newtable;
struct cli_ac_patt *ph;
uint8_t i; uint8_t i;
uint16_t len = MIN(root->ac_maxdepth, pattern->length); uint16_t len = MIN(root->ac_maxdepth, pattern->length);
@ -114,6 +115,19 @@ int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
pt->final = 1; pt->final = 1;
pattern->depth = i; pattern->depth = i;
ph = pt->list;
while(ph) {
if((ph->length == pattern->length) && (ph->prefix_length == pattern->prefix_length)) {
if(!memcmp(ph->pattern, pattern->pattern, ph->length * sizeof(uint16_t)) && !memcmp(ph->prefix, pattern->prefix, ph->prefix_length * sizeof(uint16_t))) {
pattern->next_same = ph->next_same;
ph->next_same = pattern;
return CL_SUCCESS;
}
}
ph = ph->next;
}
pattern->next = pt->list; pattern->next = pt->list;
pt->list = pattern; pt->list = pattern;
@ -336,13 +350,9 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin
uint8_t found; uint8_t found;
if(offset + pattern->length > length) if((offset + pattern->length > length) || (pattern->prefix_length > offset))
return 0; return 0;
if(pattern->prefix)
if(pattern->prefix_length > offset)
return 0;
bp = offset + pattern->depth; bp = offset + pattern->depth;
for(i = pattern->depth; i < pattern->length; i++) { for(i = pattern->depth; i < pattern->length; i++) {
@ -433,7 +443,7 @@ inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, of
int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset) int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
{ {
struct cli_ac_node *current; struct cli_ac_node *current;
struct cli_ac_patt *pt; struct cli_ac_patt *patt, *pt;
uint32_t i, bp, realoff; uint32_t i, bp, realoff;
uint16_t j; uint16_t j;
int32_t **offmatrix; int32_t **offmatrix;
@ -461,130 +471,138 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
current = current->trans[buffer[i]]; current = current->trans[buffer[i]];
if(current->final) { if(current->final) {
pt = current->list; patt = current->list;
while(pt) { while(patt) {
bp = i + 1 - pt->depth; bp = i + 1 - patt->depth;
if(ac_findmatch(buffer, bp, length, pt)) { if(ac_findmatch(buffer, bp, length, patt)) {
realoff = offset + bp - pt->prefix_length; pt = patt;
while(pt) {
if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) { realoff = offset + bp - pt->prefix_length;
if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) {
pt = pt->next; if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
continue; if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) {
pt = pt->next_same;
continue;
}
} }
}
if(pt->sigid) { /* it's a partial signature */ if(pt->sigid) { /* it's a partial signature */
if(!mdata->offmatrix[pt->sigid - 1]) { if(pt->partno != 1 && (!mdata->offmatrix[pt->sigid - 1] || !mdata->offmatrix[pt->sigid - 1][pt->partno - 2][0])) {
mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *)); pt = pt->next_same;
if(!mdata->offmatrix[pt->sigid - 1]) { continue;
cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1);
return CL_EMEM;
} }
mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); if(!mdata->offmatrix[pt->sigid - 1]) {
if(!mdata->offmatrix[pt->sigid - 1][0]) { mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *));
cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1); if(!mdata->offmatrix[pt->sigid - 1]) {
free(mdata->offmatrix[pt->sigid - 1]); cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1);
mdata->offmatrix[pt->sigid - 1] = NULL; return CL_EMEM;
return CL_EMEM; }
}
memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t)); mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
mdata->offmatrix[pt->sigid - 1][0][0] = 0; if(!mdata->offmatrix[pt->sigid - 1][0]) {
for(j = 1; j < pt->parts; j++) { cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1);
mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1); free(mdata->offmatrix[pt->sigid - 1]);
mdata->offmatrix[pt->sigid - 1][j][0] = 0; mdata->offmatrix[pt->sigid - 1] = NULL;
return CL_EMEM;
}
memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
mdata->offmatrix[pt->sigid - 1][0][0] = 0;
for(j = 1; j < pt->parts; j++) {
mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1);
mdata->offmatrix[pt->sigid - 1][j][0] = 0;
}
} }
} offmatrix = mdata->offmatrix[pt->sigid - 1];
offmatrix = mdata->offmatrix[pt->sigid - 1];
if(pt->partno != 1) {
if(pt->partno != 1) { found = 0;
found = 0; for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) {
for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) { found = 1;
found = 1; if(pt->maxdist)
if(pt->maxdist) if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist)
if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist) found = 0;
found = 0;
if(found && pt->mindist)
if(found && pt->mindist) if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist)
if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist) found = 0;
found = 0;
if(found)
if(found) break;
break; }
} }
}
if(pt->partno == 1 || (found && (pt->partno != pt->parts))) { if(pt->partno == 1 || (found && (pt->partno != pt->parts))) {
offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN; offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN;
offmatrix[pt->partno - 1][0]++; offmatrix[pt->partno - 1][0]++;
offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = realoff + pt->length + pt->prefix_length;
if(pt->partno == 1) /* save realoff for the first part */
offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff;
} else if(found && pt->partno == pt->parts) {
if(pt->type) {
if(otfrec) {
if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
type = pt->type;
if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) {
/* FIXME: we don't know which offset of the first part is the correct one */
for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
if(info.exeinfo.section)
free(info.exeinfo.section);
return CL_EMEM;
}
}
}
offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = realoff + pt->length + pt->prefix_length; memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
if(pt->partno == 1) /* save realoff for the first part */ for(j = 0; j < pt->parts; j++)
offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff; offmatrix[j][0] = 0;
} else if(found && pt->partno == pt->parts) { }
}
} else { /* !pt->type */
if(virname)
*virname = pt->virname;
if(info.exeinfo.section)
free(info.exeinfo.section);
return CL_VIRUS;
}
}
} else { /* old type signature */
if(pt->type) { if(pt->type) {
if(otfrec) { if(otfrec) {
if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) { if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
cli_dbgmsg("Matched signature for file type %s\n", pt->virname); cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
type = pt->type; type = pt->type;
if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) { if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) {
/* FIXME: we don't know which offset of the first part is the correct one */
for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) { if(ac_addtype(ftoffset, type, realoff)) {
if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) { if(info.exeinfo.section)
if(info.exeinfo.section) free(info.exeinfo.section);
free(info.exeinfo.section); return CL_EMEM;
return CL_EMEM;
}
} }
} }
memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
for(j = 0; j < pt->parts; j++)
offmatrix[j][0] = 0;
} }
} }
} else {
} else { /* !pt->type */
if(virname) if(virname)
*virname = pt->virname; *virname = pt->virname;
if(info.exeinfo.section) if(info.exeinfo.section)
free(info.exeinfo.section); free(info.exeinfo.section);
return CL_VIRUS; return CL_VIRUS;
} }
} }
pt = pt->next_same;
} else { /* old type signature */
if(pt->type) {
if(otfrec) {
if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
type = pt->type;
if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) {
if(ac_addtype(ftoffset, type, realoff)) {
if(info.exeinfo.section)
free(info.exeinfo.section);
return CL_EMEM;
}
}
}
}
} else {
if(virname)
*virname = pt->virname;
if(info.exeinfo.section)
free(info.exeinfo.section);
return CL_VIRUS;
}
} }
} }
patt = patt->next;
pt = pt->next;
} }
} }
} }
@ -751,15 +769,15 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
if(new->pattern[j] & CLI_MATCH_WILDCARD) { if(new->pattern[j] & CLI_MATCH_WILDCARD) {
break; break;
} else { } else {
if(j - i + 1 > plen) { if(j - i + 1 >= plen) {
plen = j - i + 1; plen = j - i + 1;
ppos = i; ppos = i;
} }
} }
if(plen >= root->ac_maxdepth) if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
break; break;
} }
if(plen >= root->ac_maxdepth) if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
break; break;
} }

@ -43,7 +43,7 @@ struct cli_ac_patt {
uint8_t target; uint8_t target;
uint16_t type; uint16_t type;
unsigned char **altc; unsigned char **altc;
struct cli_ac_patt *next; struct cli_ac_patt *next, *next_same;
}; };
struct cli_ac_node { struct cli_ac_node {

Loading…
Cancel
Save