[WIP] added nocase support to clamav ac algorithm

remotes/push_mirror/klin/altstr-yara
Kevin Lin 10 years ago
parent 1c4ee73052
commit a02acd50b6
  1. 45
      libclamav/matcher-ac.c
  2. 2
      libclamav/matcher-ac.h
  3. 6
      libclamav/matcher.h
  4. 3
      libclamav/others.h
  5. 86
      libclamav/readdb.c
  6. 2
      libclamav/readdb.h
  7. 4
      libclamav/str.h
  8. 4
      sigtool/sigtool.c
  9. 16
      unit_tests/check_matchers.c

@ -111,7 +111,10 @@ int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
}
}
next = pt->trans[(unsigned char) (pattern->pattern[i] & 0xff)];
if (root->ac_nocase)
next = pt->trans[cli_nocase((unsigned char) (pattern->pattern[i] & 0xff))];
else
next = pt->trans[(unsigned char) (pattern->pattern[i] & 0xff)];
if(!next) {
next = (struct cli_ac_node *) mpool_calloc(root->mempool, 1, sizeof(struct cli_ac_node));
@ -762,14 +765,18 @@ int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigne
* an alternative contains strings of different lengths and
* more than one of them can match at the current position.
*/
#define AC_MATCH_CHAR(p,b) \
switch(wc = p & CLI_MATCH_WILDCARD) { \
switch(wc = p & CLI_MATCH_METADATA) { \
case CLI_MATCH_CHAR: \
if((unsigned char) p != b) \
match = 0; \
break; \
\
case CLI_MATCH_NOCASE: \
if(cli_nocase((unsigned char)(p & 0xff)) != cli_nocase(b)) \
match = 0; \
break; \
\
case CLI_MATCH_IGNORE: \
break; \
\
@ -1250,7 +1257,10 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
current = root->ac_root;
for(i = 0; i < length; i++) {
current = current->trans[buffer[i]];
if (root->ac_nocase)
current = current->trans[cli_nocase(buffer[i])];
else
current = current->trans[buffer[i]];
if(UNLIKELY(IS_FINAL(current))) {
struct cli_ac_patt *faillist = current->fail->list;
@ -1530,12 +1540,12 @@ static int qcompare(const void *a, const void *b)
}
/* FIXME: clean up the code */
int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options)
int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, const char *sigopts, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options)
{
struct cli_ac_patt *new;
char *pt, *pt2, *hex = NULL, *hexcpy = NULL;
uint16_t i, j, ppos = 0, pend, *dec, nzpos = 0;
uint8_t wprefix = 0, zprefix = 1, plen = 0, nzplen = 0;
uint8_t wprefix = 0, zprefix = 1, plen = 0, nzplen = 0, nocase = 0;
struct cli_ac_special *newspecial, *specialpt, **newtable;
int ret, error = CL_SUCCESS;
@ -1550,6 +1560,22 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
return CL_EMALFDB;
}
if (sigopts) {
i = 0;
while (sigopts[i] != '\0') {
switch (sigopts[i]) {
case 'i':
nocase = 1;
break;
default:
cli_errmsg("cli_ac_addsig: Signature for %s uses invalid option: %02x\n", virname, sigopts[i]);
return CL_EMALFDB;
}
i++;
}
}
if((new = (struct cli_ac_patt *) mpool_calloc(root->mempool, 1, sizeof(struct cli_ac_patt))) == NULL)
return CL_EMEM;
@ -1853,6 +1879,13 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex
new->length = strlen(hex ? hex : hexsig) / 2;
free(hex);
/* setting nocase match */
if (nocase) {
for (i = 0; i < new->length; ++i)
if ((new->pattern[i] & CLI_MATCH_METADATA) == CLI_MATCH_CHAR)
new->pattern[i] += CLI_MATCH_NOCASE;
}
if (root->filter) {
/* so that we can show meaningful messages */
new->virname = (char*)virname;

@ -99,6 +99,6 @@ int cli_ac_buildtrie(struct cli_matcher *root);
int cli_ac_init(struct cli_matcher *root, uint8_t mindepth, uint8_t maxdepth, uint8_t dconf_prefiltering);
int cli_ac_caloff(const struct cli_matcher *root, struct cli_ac_data *data, const struct cli_target_info *info);
void cli_ac_free(struct cli_matcher *root);
int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options);
int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, const char *sigopts, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options);
#endif

@ -43,8 +43,10 @@ struct cli_target_info {
#include "fmap.h"
#include "mpool.h"
#define CLI_MATCH_WILDCARD 0xff00
#define CLI_MATCH_METADATA 0xff00
#define CLI_MATCH_WILDCARD 0x0f00
#define CLI_MATCH_CHAR 0x0000
#define CLI_MATCH_NOCASE 0x1000
#define CLI_MATCH_IGNORE 0x0100
#define CLI_MATCH_SPECIAL 0x0200
#define CLI_MATCH_NIBBLE_HIGH 0x0300
@ -108,7 +110,7 @@ struct cli_matcher {
struct filter *filter;
uint16_t maxpatlen;
uint8_t ac_only;
uint8_t ac_nocase, ac_only;
/* Perl-Compiled Regular Expressions */
#if HAVE_PCRE

@ -262,6 +262,9 @@ struct cl_engine {
/* Roots table */
struct cli_matcher **root;
/* Yara table */
struct cli_matcher *yroot;
/* hash matcher for standard MD5 sigs */
struct cli_matcher *hm_hdb;
/* hash matcher for MD5 sigs for PE sections */

@ -116,7 +116,7 @@ char *cli_virname(const char *virname, unsigned int official)
}
#define PCRE_TOKENS 4
int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options)
int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, const char *sigopts, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options)
{
struct cli_bm_patt *bm_new;
char *pt, *hexcpy, *start, *n, l, r;
@ -178,18 +178,9 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
return CL_SUCCESS;
}
if (strchr(hexsig, '/')) {
#if HAVE_PCRE
/* expected format => ^offset:trigger/regex/[cflags]$ */
const char *trigger, *pattern, *cflags;
if (strrchr(hexsig, '/')) {
char *start, *end;
/* get checked */
if (hexsig[0] == '/') {
cli_errmsg("cli_parseadd(): PCRE subsig must contain logical trigger\n");
return CL_EMALFDB;
}
/* get copied */
hexcpy = cli_calloc(hexlen+1, sizeof(char));
if(!hexcpy)
@ -199,31 +190,48 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
/* get delimiters-ed */
start = strchr(hexcpy, '/');
end = strrchr(hexcpy, '/');
if (start == end) {
cli_errmsg("cli_parseadd(): PCRE expression must be delimited by '/'\n");
free(hexcpy);
return CL_EMALFDB;
}
/* get NULL-ed */
*start = '\0';
*end = '\0';
/* get pcre-ed */
if (start != end) {
#if HAVE_PCRE
/* expected format => ^offset:trigger/regex/[cflags]$ */
const char *trigger, *pattern, *cflags;
/* get tokens-ed */
trigger = hexcpy;
pattern = start+1;
cflags = end+1;
if (*cflags == '\0') /* get compat-ed */
cflags = NULL;
/* get checked */
if (hexsig[0] == '/') {
cli_errmsg("cli_parseadd(): PCRE subsig must contain logical trigger\n");
return CL_EMALFDB;
}
/* normal trigger, get added */
ret = cli_pcre_addpatt(root, virname, trigger, pattern, cflags, offset, lsigid, options);
free(hexcpy);
return ret;
/* get NULL-ed */
*start = '\0';
*end = '\0';
/* get tokens-ed */
trigger = hexcpy;
pattern = start+1;
cflags = end+1;
if (*cflags == '\0') /* get compat-ed */
cflags = NULL;
/* normal trigger, get added */
ret = cli_pcre_addpatt(root, virname, trigger, pattern, cflags, offset, lsigid, options);
free(hexcpy);
return ret;
#else
cli_errmsg("cli_parseadd(): cannot parse PCRE subsig without PCRE support\n");
return CL_EPARSE;
free(hexcpy);
cli_errmsg("cli_parseadd(): cannot parse PCRE subsig without PCRE support\n");
return CL_EPARSE;
#endif
} else { /* get option-ed */
/* get NULL-ed */
*end = '\0';
/* get called */
ret = cli_parse_add(root, virname, hexcpy, end+1, rtype, type, offset, target, lsigid, options);
free(hexcpy);
return ret;
}
}
else if((wild = strchr(hexsig, '{'))) {
if(sscanf(wild, "%c%u%c", &l, &range, &r) == 3 && l == '{' && r == '}' && range > 0 && range < 128) {
@ -242,7 +250,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
}
strcat(hexcpy, ++wild);
ret = cli_parse_add(root, virname, hexcpy, rtype, type, offset, target, lsigid, options);
ret = cli_parse_add(root, virname, hexcpy, sigopts, rtype, type, offset, target, lsigid, options);
free(hexcpy);
return ret;
@ -280,7 +288,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
*pt++ = 0;
}
if((ret = cli_ac_addsig(root, virname, start, root->ac_partsigs, parts, i, rtype, type, mindist, maxdist, offset, lsigid, options))) {
if((ret = cli_ac_addsig(root, virname, start, sigopts, root->ac_partsigs, parts, i, rtype, type, mindist, maxdist, offset, lsigid, options))) {
cli_errmsg("cli_parse_add(): Problem adding signature (1).\n");
error = 1;
break;
@ -363,7 +371,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
return CL_EMALFDB;
}
if((ret = cli_ac_addsig(root, virname, pt, root->ac_partsigs, parts, i, rtype, type, 0, 0, offset, lsigid, options))) {
if((ret = cli_ac_addsig(root, virname, pt, sigopts, root->ac_partsigs, parts, i, rtype, type, 0, 0, offset, lsigid, options))) {
cli_errmsg("cli_parse_add(): Problem adding signature (2).\n");
free(pt);
return ret;
@ -372,7 +380,7 @@ int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hex
free(pt);
}
} else if(root->ac_only || type || lsigid || strpbrk(hexsig, "?([") || (root->bm_offmode && (!strcmp(offset, "*") || strchr(offset, ','))) || strstr(offset, "VI") || strchr(offset, '$')) {
if((ret = cli_ac_addsig(root, virname, hexsig, 0, 0, 0, rtype, type, 0, 0, offset, lsigid, options))) {
if((ret = cli_ac_addsig(root, virname, hexsig, sigopts, 0, 0, 0, rtype, type, 0, 0, offset, lsigid, options))) {
cli_errmsg("cli_parse_add(): Problem adding signature (3).\n");
return ret;
}
@ -656,7 +664,7 @@ static int cli_loaddb(FILE *fs, struct cl_engine *engine, unsigned int *signo, u
if(*pt == '=') continue;
if((ret = cli_parse_add(root, start, pt, 0, 0, "*", 0, NULL, options))) {
if((ret = cli_parse_add(root, start, pt, NULL, 0, 0, "*", 0, NULL, options))) {
cli_dbgmsg("cli_loaddb: cli_parse_add failed on line %d\n", line);
ret = CL_EMALFDB;
break;
@ -1056,7 +1064,7 @@ static int cli_loadndb(FILE *fs, struct cl_engine *engine, unsigned int *signo,
offset = tokens[2];
sig = tokens[3];
if((ret = cli_parse_add(root, virname, sig, 0, 0, offset, target, NULL, options))) {
if((ret = cli_parse_add(root, virname, sig, NULL, 0, 0, offset, target, NULL, options))) {
ret = CL_EMALFDB;
break;
}
@ -1491,7 +1499,7 @@ static int load_oneldb(char *buffer, int chkpua, struct cl_engine *engine, unsig
sig = tokens[3 + i];
}
if((ret = cli_parse_add(root, virname, sig, 0, 0, offset, target, lsigid, options)))
if((ret = cli_parse_add(root, virname, sig, NULL, 0, 0, offset, target, lsigid, options)))
return ret;
if(sig[0] == '$' && i) {
@ -1769,7 +1777,7 @@ static int cli_loadftm(FILE *fs, struct cl_engine *engine, unsigned int options,
magictype = atoi(tokens[0]);
if(magictype == 1) { /* A-C */
if((ret = cli_parse_add(engine->root[0], tokens[3], tokens[2], rtype, type, tokens[1], 0, NULL, options)))
if((ret = cli_parse_add(engine->root[0], tokens[3], tokens[2], NULL, rtype, type, tokens[1], 0, NULL, options)))
break;
} else if ((magictype == 0) || (magictype == 4)) { /* memcmp() */

@ -69,7 +69,7 @@
char *cli_virname(const char *virname, unsigned int official);
int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options);
int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, const char *sigopts, uint16_t rtype, uint16_t type, const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options);
int cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio);

@ -21,6 +21,7 @@
#ifndef __STR_H
#define __STR_H
#include <ctype.h>
#include <sys/types.h>
#include "cltypes.h"
@ -31,6 +32,9 @@
const char *cli_strcasestr(const char *haystack, const char *needle);
#endif
#include <stdio.h>
#define cli_nocase(val) tolower(val)
int cli_strbcasestr(const char *haystack, const char *needle);
int cli_chomp(char *string);
char *cli_strtok(const char *line, int field, const char *delim);

@ -1998,7 +1998,7 @@ static void matchsig(const char *sig, const char *offset, int fd)
return;
}
if(cli_parse_add(engine->root[0], "test", sig, 0, 0, "*", 0, NULL, 0) != CL_SUCCESS) {
if(cli_parse_add(engine->root[0], "test", sig, NULL, 0, 0, "*", 0, NULL, 0) != CL_SUCCESS) {
mprintf("!matchsig: Can't parse signature\n");
cl_engine_free(engine);
return;
@ -2907,7 +2907,7 @@ static int dumpcerts(const struct optstruct *opts)
return -1;
}
if(cli_parse_add(engine->root[0], "test", "deadbeef", 0, 0, "*", 0, NULL, 0) != CL_SUCCESS) {
if(cli_parse_add(engine->root[0], "test", "deadbeef", NULL, 0, 0, "*", 0, NULL, 0) != CL_SUCCESS) {
mprintf("!dumpcerts: Can't parse signature\n");
cl_engine_free(engine);
return -1;

@ -99,7 +99,7 @@ START_TEST (test_ac_scanbuff) {
for(i = 0; ac_testdata[i].data; i++) {
ret = cli_parse_add(root, ac_testdata[i].virname, ac_testdata[i].hexsig, 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, ac_testdata[i].virname, ac_testdata[i].hexsig, NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
}
@ -138,11 +138,11 @@ START_TEST (test_bm_scanbuff) {
ret = cli_bm_init(root);
fail_unless(ret == CL_SUCCESS, "cli_bm_init() failed");
ret = cli_parse_add(root, "Sig1", "deadbabe", 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, "Sig1", "deadbabe", NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
ret = cli_parse_add(root, "Sig2", "deadbeef", 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, "Sig2", "deadbeef", NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
ret = cli_parse_add(root, "Sig3", "babedead", 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, "Sig3", "babedead", NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
ret = cli_bm_scanbuff((const unsigned char*)"blah\xde\xad\xbe\xef", 12, &virname, NULL, root, 0, NULL, NULL, NULL);
@ -169,7 +169,7 @@ START_TEST (test_ac_scanbuff_allscan) {
for(i = 0; ac_testdata[i].data; i++) {
ret = cli_parse_add(root, ac_testdata[i].virname, ac_testdata[i].hexsig, 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, ac_testdata[i].virname, ac_testdata[i].hexsig, NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
}
@ -214,11 +214,11 @@ START_TEST (test_bm_scanbuff_allscan) {
ret = cli_bm_init(root);
fail_unless(ret == CL_SUCCESS, "cli_bm_init() failed");
ret = cli_parse_add(root, "Sig1", "deadbabe", 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, "Sig1", "deadbabe", NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
ret = cli_parse_add(root, "Sig2", "deadbeef", 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, "Sig2", "deadbeef", NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
ret = cli_parse_add(root, "Sig3", "babedead", 0, 0, "*", 0, NULL, 0);
ret = cli_parse_add(root, "Sig3", "babedead", NULL, 0, 0, "*", 0, NULL, 0);
fail_unless(ret == CL_SUCCESS, "cli_parse_add() failed");
ret = cli_bm_scanbuff((const unsigned char*)"blah\xde\xad\xbe\xef", 12, &virname, NULL, root, 0, NULL, NULL, NULL);

Loading…
Cancel
Save