add more unit-tests

fix matching bugs in regex_list


git-svn: trunk@4020
remotes/push_mirror/0.95
Török Edvin 18 years ago
parent a497dce52d
commit a2d14e06b9
  1. 30
      libclamav/regex_list.c
  2. 7
      libclamav/regex_list.h
  3. 64
      unit_tests/check_regex.c

@ -108,7 +108,7 @@ static int SO_preprocess_add(struct filter *m, const unsigned char *pattern, siz
if(j) {
j--;
m->end[q] &= ~(1 << j);
m->end_fast[pattern[j]] &= (1<<j);
m->end_fast[pattern[j+1]] &= ~(1<<j);
}
return 0;
}
@ -126,6 +126,13 @@ static long SO_search(const struct filter *m, const unsigned char *data, unsigne
const uint32_t *EndFast = m->end_fast;
if(!len) return -1;
/* cut length, and make it modulo 2 */
if(len > MAXSOPATLEN) {
len = MAXSOPATLEN;
} else {
/* we use 2-grams, must be multiple of 2 */
len = len & ~1;
}
/* Shift-Or like search algorithm */
for(j=0;j < len-1; j++) {
const uint16_t q0 = cli_readint16( &data[j] );
@ -201,6 +208,8 @@ static int validate_subdomain(const struct regex_list *regex, const struct pre_f
(match_len < buffer_len &&
((c=get_char_at_pos_with_skip(pre_fixup,buffer,buffer_len-match_len))=='.' || (c==' ')) )
/* subdomain matched*/)) {
/* we have an extra / at the end */
if(match_len > 0) match_len--;
cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
if(real_len >= match_len + 1) {
@ -290,7 +299,6 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di
* negatives */
return 0;
}
rc = cli_ac_scanbuff((const unsigned char*)bufrev,buffer_len, NULL, (void*)&regex, &res, &matcher->suffixes,&mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL);
free(bufrev);
cli_ac_freedata(&mdata);
@ -517,7 +525,7 @@ void regex_list_done(struct regex_matcher* matcher)
cli_ac_free(&matcher->suffixes);
if(matcher->suffix_regexes) {
for(i=0;i<matcher->suffix_cnt;i++) {
struct regex_list *r = matcher->suffix_regexes[i];
struct regex_list *r = matcher->suffix_regexes[i].head;
while(r) {
struct regex_list *q = r;
r = r->nxt;
@ -599,6 +607,16 @@ static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info,
#define MODULE "regex_list: "
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
{
if(!ht->head)
ht->head = regex;
if(ht->tail) {
ht->tail->nxt = regex;
}
ht->tail = regex;
}
/* returns 0 on success, clamav error code otherwise */
static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
{
@ -618,8 +636,7 @@ static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_le
if(el) {
/* existing suffix */
assert((size_t)el->data < matcher->suffix_cnt);
regex->nxt = matcher->suffix_regexes[el->data];
matcher->suffix_regexes[el->data] = regex;
list_add_tail(&matcher->suffix_regexes[el->data], regex);
cli_dbgmsg(MODULE "added new regex to existing suffix %s: %s\n", suffix, regex->pattern);
} else {
/* new suffix */
@ -628,7 +645,8 @@ static int add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_le
matcher->suffix_regexes = cli_realloc(matcher->suffix_regexes, (n+1)*sizeof(*matcher->suffix_regexes));
if(!matcher->suffix_regexes)
return CL_EMEM;
matcher->suffix_regexes[n] = regex;
matcher->suffix_regexes[n].tail = regex;
matcher->suffix_regexes[n].head = regex;
add_newsuffix(matcher, regex, suffix, suffix_len);
cli_dbgmsg(MODULE "added new suffix %s, for regex: %s\n", suffix, regex->pattern);
}

@ -36,10 +36,15 @@ struct filter {
unsigned long m;
};
struct regex_list_ht {
struct regex_list *head;
struct regex_list *tail;
};
struct regex_matcher {
struct hashtable suffix_hash;
size_t suffix_cnt;
struct regex_list **suffix_regexes;
struct regex_list_ht *suffix_regexes;
size_t regex_cnt;
regex_t **all_pregs;
struct cli_matcher suffixes;

@ -175,7 +175,14 @@ static const struct rtest {
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
"http://srx.main.ebayrtm.com.evil.example.com",
"pages.ebay.de",
0}
0},
{".+\\.ebayrtm\\.com([/?].*)?:.+\\.ebay\\.(de|com|co\\.uk)([/?].*)?/",
"www.www.ebayrtm.com?somecgi",
"www.ebay.com/something",1},
{NULL,
"http://key.com","go to key.com",2
}
};
START_TEST (regex_list_match_test)
@ -237,6 +244,7 @@ static void psetup(void)
f = fdopen(open_testfile("input/daily.pdb"),"r");
fail_unless(!!f, "fopen daily.pdb");
cl_debug();
rc = load_regex_matcher(engine->domainlist_matcher, f, 0, 0, NULL);
fail_unless(rc == 0, "load_regex_matcher");
fclose(f);
@ -266,9 +274,9 @@ static void pteardown(void)
engine = NULL;
}
START_TEST (phishingScan_test)
static void do_phishing_test(const struct rtest *rtest)
{
const struct rtest *rtest = &rtests[_i];
char *realurl;
cli_ctx ctx;
const char *virname;
@ -294,15 +302,58 @@ START_TEST (phishingScan_test)
ctx.engine = engine;
ctx.virname = &virname;
rc = phishingScan(NULL, NULL, &ctx, &hrefs);
fail_unless(rc == CL_CLEAN,"phishingScan");
fail_unless(!!ctx.found_possibly_unwanted == !rtest->result ,
"found unwanted: %d, expected: %d\n", ctx.found_possibly_unwanted, !rtest->result);
html_tag_arg_free(&hrefs);
fail_unless(rc == CL_CLEAN,"phishingScan");
switch(rtest->result) {
case 0:
fail_unless(ctx.found_possibly_unwanted,
"this should be phishing, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case 1:
fail_unless(!ctx.found_possibly_unwanted,
"this should be whitelisted, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case 2:
fail_unless(!ctx.found_possibly_unwanted,
"this should be clean, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
}
}
START_TEST (phishingScan_test)
{
do_phishing_test(&rtests[_i]);
}
END_TEST
START_TEST(phishing_fake_test)
{
char buf[4096];
FILE *f = fdopen(open_testfile("input/daily.pdb"),"r");
fail_unless(!!f,"fopen daily.pdb");
while(fgets(buf, sizeof(buf), f)) {
struct rtest rtest;
const char *pdb = strchr(buf,':');
fail_unless(!!pdb, "missing : in pdb");
rtest.realurl = pdb;
rtest.displayurl = pdb;
rtest.result = 2;
do_phishing_test(&rtest);
rtest.realurl = "http://fake.example.com";
rtest.result = 0;
do_phishing_test(&rtest);
}
fclose(f);
}
END_TEST
Suite *test_regex_suite(void)
{
Suite *s = suite_create("regex");
@ -324,6 +375,7 @@ Suite *test_regex_suite(void)
suite_add_tcase(s, tc_phish);
tcase_add_checked_fixture(tc_phish, psetup, pteardown);
tcase_add_loop_test(tc_phish, phishingScan_test, 0, sizeof(rtests)/sizeof(rtests[0]));
tcase_add_test(tc_phish, phishing_fake_test);
return s;
}

Loading…
Cancel
Save