optimization: use the filter to avoid cli_bm_scanbuff calls

bugfix: SO_search returns -1 when it doesn't find a match,
code was assuming it returns 0.

git-svn: trunk@4061
0.95
Török Edvin 18 years ago
parent 9a22341892
commit 063ddd53f8
  1. 11
      libclamav/phishcheck.c
  2. 6
      libclamav/regex_list.c
  3. 2
      libclamav/regex_list.h

@ -726,7 +726,6 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
return 0;
}
/* -------end runtime disable---------*/
int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
{
@ -768,7 +767,6 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
urls.realLink.refcount=-1;
urls.displayLink.refcount=-1;
int rc = phishingCheck(ctx->engine, &urls);
//printf("%d\n",rc);
}
fclose(f);
return 0;
@ -1202,7 +1200,8 @@ static int hash_match(const struct regex_matcher *rlist, const char *host, size_
cli_md5_update(&md5, host, hlen);
cli_md5_update(&md5, path, plen);
cli_md5_final(md5_dig, &md5);
if(cli_bm_scanbuff(md5_dig, 16, &virname, &rlist->md5_hashes,0,0,-1) == CL_VIRUS) {
if(SO_search(&rlist->md5_filter, md5_dig, 16) != -1 &&
cli_bm_scanbuff(md5_dig, 16, &virname, &rlist->md5_hashes,0,0,-1) == CL_VIRUS) {
switch(*virname) {
case '1':
return CL_PHISH_HASH1;
@ -1243,8 +1242,9 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
str_hex_to_char(&url, &urlend);
len = urlend - url;
host_begin = strchr(url,':');
if(!host_begin)
if(!host_begin) {
return CL_PHISH_CLEAN;
}
++host_begin;
while((host_begin < urlend) && *host_begin == '/') ++host_begin;
while(*host_begin == '.' && host_begin < urlend) ++host_begin;
@ -1291,8 +1291,9 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
for(ki=0;ki < k; ki++) {
assert(pp[ki] < path_len);
rc = hash_match(rlist, lp[ji], host_begin + host_len - lp[ji] + 1, path_begin, pp[ki]);
if(rc)
if(rc) {
return rc;
}
}
}
return CL_SUCCESS;

@ -117,7 +117,7 @@ static int SO_preprocess_add(struct filter *m, const unsigned char *pattern, siz
* each bit in "state" means an active state, when a char is encountered
* we determine what states can remain active.
* The FSM transition rules are expressed as bit-masks */
static long SO_search(const struct filter *m, const unsigned char *data, unsigned long len)
long SO_search(const struct filter *m, const unsigned char *data, unsigned long len)
{
size_t j;
uint32_t state = ~0;
@ -291,7 +291,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
return CL_EMEM;
reverse_string(bufrev);
rc = SO_search(&matcher->filter, (const unsigned char*)bufrev, buffer_len) != -1;
if(!rc) {
if(rc == -1) {
free(buffer);
free(bufrev);
/* filter says this suffix doesn't match.
@ -354,6 +354,7 @@ int init_regex_list(struct regex_matcher* matcher)
return rc;
}
SO_init(&matcher->filter);
SO_init(&matcher->md5_filter);
return CL_SUCCESS;
}
@ -416,6 +417,7 @@ static int add_hash(struct regex_matcher *matcher, char* pattern, const char fl)
return CL_EMEM;
}
*pat->virname = fl;
SO_preprocess_add(&matcher->md5_filter, pat->pattern, pat->length);
if(rc = cli_bm_addpatt(&matcher->md5_hashes, pat)) {
cli_errmsg("add_hash: failed to add BM pattern\n");
free(pat->pattern);

@ -49,6 +49,7 @@ struct regex_matcher {
regex_t **all_pregs;
struct cli_matcher suffixes;
struct cli_matcher md5_hashes;
struct filter md5_filter;
struct filter filter;
int list_inited:2;
int list_loaded:2;
@ -63,6 +64,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
void regex_list_cleanup(struct regex_matcher* matcher);
void regex_list_done(struct regex_matcher* matcher);
int is_regex_ok(struct regex_matcher* matcher);
long SO_search(const struct filter *m, const unsigned char *data, unsigned long len);
#endif

Loading…
Cancel
Save