diff --git a/clamav-devel/AUTHORS b/clamav-devel/AUTHORS index f7e825544..5208cf304 100644 --- a/clamav-devel/AUTHORS +++ b/clamav-devel/AUTHORS @@ -65,12 +65,13 @@ libclamav/unrarlib.c: Christian Scheurer and Johannes Winkelmann, see www.unrarlib.org libclamav/mspack: Stuart Caie -Code patches submitted by (in alphabetical order): +Patch submitters (in alphabetical order): Kamil Andrusz Patrick Bihan-Faou Martin Blapp Igor Brezac +Mike Brudenell Len Budney David Champion Andrey Cherezov diff --git a/clamav-devel/ChangeLog b/clamav-devel/ChangeLog index 9ac15c91f..af78adb45 100644 --- a/clamav-devel/ChangeLog +++ b/clamav-devel/ChangeLog @@ -1,3 +1,14 @@ +Wed Aug 18 20:37:42 CEST 2004 (tk) +---------------------------------- + * libclamav/contrib: Include database optimisation tool (optimize/optimize.c). + It's ClamAV specific and attempts to normalise signature + prefixes so there are more signatures using the same + prefix (and saving nodes in the Aho-Corasick pattern + matcher (but slowing it down)). Included for educational + purposes. + * clamscan/sigtool: fix compilation problems with Sun's SUNWspro C (patch + by Mike Brudenell ) + Wed Aug 18 16:54:01 BST 2004 (njh) ---------------------------------- * libclamav/mbox.c: Only followURL if CL_MAILURL is set. diff --git a/clamav-devel/clamscan/manager.c b/clamav-devel/clamscan/manager.c index d57dfa9da..045eb11c3 100644 --- a/clamav-devel/clamscan/manager.c +++ b/clamav-devel/clamscan/manager.c @@ -526,7 +526,11 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass /* unpack file - as unprivileged user */ if(cli_strbcasestr(filename, ".zip")) { - char *args[] = { "unzip", "-P", "clam", "-o", (char *) filename, NULL }; + char *args[] = { "unzip", "-P", "clam", "-o", NULL, NULL }; + /* Sun's SUNWspro C compiler doesn't allow direct initialisation + * with a variable + */ + args[4] = (char *) filename; if((userprg = getargl(opt, "unzip"))) ret = clamav_unpack(userprg, args, gendir, user, opt); @@ -534,56 +538,64 @@ int scancompressed(const char *filename, struct cl_node *root, const struct pass ret = clamav_unpack("unzip", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".rar")) { - char *args[] = { "unrar", "x", "-p-", "-y", (char *) filename, NULL }; + char *args[] = { "unrar", "x", "-p-", "-y", NULL, NULL }; + args[4] = (char *) filename; if((userprg = getargl(opt, "unrar"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("unrar", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".arj")) { - char *args[] = { "arj", "x","-y", (char *) filename, NULL }; + char *args[] = { "arj", "x","-y", NULL, NULL }; + args[3] = (char *) filename; if((userprg = getargl(opt, "arj"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("arj", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".zoo")) { - char *args[] = { "unzoo", "-x","-j","./", (char *) filename, NULL }; + char *args[] = { "unzoo", "-x","-j","./", NULL, NULL }; + args[4] = (char *) filename; if((userprg = getargl(opt, "unzoo"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("unzoo", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".jar")) { - char *args[] = { "unzip", "-P", "clam", "-o", (char *) filename, NULL }; + char *args[] = { "unzip", "-P", "clam", "-o", NULL, NULL }; + args[4] = (char *) filename; if((userprg = getargl(opt, "jar"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("unzip", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".lzh")) { - char *args[] = { "lha", "xf", (char *) filename, NULL }; + char *args[] = { "lha", "xf", NULL, NULL }; + args[2] = (char *) filename; if((userprg = getargl(opt, "lha"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("lha", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".tar")) { - char *args[] = { "tar", "-xpvf", (char *) filename, NULL }; + char *args[] = { "tar", "-xpvf", NULL, NULL }; + args[2] = (char *) filename; if((userprg = getargl(opt, "tar"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("tar", args, gendir, user, opt); } else if(cli_strbcasestr(filename, ".deb")) { - char *args[] = { "ar", "x", (char *) filename, NULL }; + char *args[] = { "ar", "x", NULL, NULL }; + args[2] = (char *) filename; if((userprg = getargl(opt, "deb"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else ret = clamav_unpack("ar", args, gendir, user, opt); } else if((cli_strbcasestr(filename, ".tar.gz") || cli_strbcasestr(filename, ".tgz"))) { - char *args[] = { "tar", "-zxpvf", (char *) filename, NULL }; + char *args[] = { "tar", "-zxpvf", NULL, NULL }; + args[2] = (char *) filename; if((userprg = getargl(opt, "tgz"))) ret = clamav_unpack(userprg, args, gendir, user, opt); else diff --git a/clamav-devel/contrib/optimize/optimize.c b/clamav-devel/contrib/optimize/optimize.c new file mode 100644 index 000000000..6f0c01179 --- /dev/null +++ b/clamav-devel/contrib/optimize/optimize.c @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2004 Tomasz Kojm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#define MINLENGTH 10 /* only optimize signatures longer than MINLENGT */ +#define FILEBUFF 16384 +#define ANALYZE 6 /* only analyze first ANALYZE characters */ + +int hex2int(int c) +{ + int l = tolower(c); + + if (!isascii(l)) + return -1; + if (isdigit(l)) + return l - '0'; + if ((l >= 'a') && (l <= 'f')) + return l + 10 - 'a'; + + return -1; +} + +char *hex2str(const char *hex, int howmany) +{ + short int val, c; + int i, len; + char *str, *ptr; + + len = strlen(hex); + + /* additional check - hex strings are parity length here */ + if(len % 2 != 0) { + printf("hex2str(): Malformed hexstring: %s (length: %d)\n", hex, len); + return NULL; + } + + str = calloc((howmany / 2) + 1, sizeof(char)); + if(!str) + return NULL; + + ptr = str; + + if(howmany > len) + howmany = len; + + for(i = 0; i < howmany; i += 2) { + if(hex[i] == '?') { + printf("Can't optimize polymorphic signature.\n"); + free(str); + return NULL; + } else { + if((c = hex2int(hex[i])) >= 0) { + val = c; + if((c = hex2int(hex[i+1])) >= 0) { + val = (val << 4) + c; + } else { + free(str); + return NULL; + } + } else { + free(str); + return NULL; + } + } + *ptr++ = val; + } + + return str; +} + +void chomp(char *string) +{ + size_t l = strlen(string); + + if(l == 0) + return; + + --l; + if((string[l] == '\n') || (string[l] == '\r')) { + string[l] = '\0'; + + if(l > 0) { + --l; + if(string[l] == '\r') + string[l] = '\0'; + } + } +} + +int main(int argc, char **argv) +{ + int line = 0, found, i, nodes = 0, optimized = 0, optimal = 0; + unsigned char c1, c2; + char *buffer, *start, *pt, **prefix, *sig; + FILE *in, *out; + + + if(argc != 3) { + printf("%s input_db output_db\n", argv[0]); + exit(1); + } + + if((in = fopen(argv[1], "rb")) == NULL) { + printf("Can't open input database %s\n", argv[1]); + exit(1); + } + + if((out = fopen(argv[2], "wb")) == NULL) { + printf("Can't open output database %s\n", argv[1]); + exit(1); + } + + prefix = (char **) calloc(256, sizeof(char *)); + for(i = 0; i < 256; i++) + prefix[i] = (char *) calloc(256, sizeof(char)); + + if(!(buffer = (char *) malloc(FILEBUFF))) { + exit(1); + } + + memset(buffer, 0, FILEBUFF); + + while(fgets(buffer, FILEBUFF, in)) { + + line++; + chomp(buffer); + + pt = strchr(buffer, '='); + if(!pt) { + printf("Malformed pattern line %d.\n", line); + free(buffer); + exit(1); + } + + start = buffer; + *pt++ = 0; + + if(*pt == '=') + continue; + + if(strlen(pt) < MINLENGTH) { + fprintf(out, "%s=%s\n", start, pt); + continue; + } + + sig = hex2str(pt, 2 * ANALYZE); + + if(!sig) { + printf("Can't decode signature %d\n", line); + exit(1); + } + + found = -1; + + for(i = 0; i < ANALYZE - 1; i++) { + c1 = ((unsigned char) sig[i]) & 0xff; + c2 = ((unsigned char) sig[i + 1]) & 0xff; + + if(prefix[c1][c2]) { + found = i; + break; + } + } + + if(found < 0) { + printf("Can't optimize signature %d\n", line); + prefix[c1][c2] = 1; + nodes++; + } else if(found == 0) { + printf("Signature %d is already optimal.\n", line); + optimal++; + } else { + pt = pt + 2 * found; + printf("Signature %d optimized (new start at %d byte)\n", line, found); + optimized++; + } + + fprintf(out, "%s=%s\n", start, pt); + } + + fclose(in); + fclose(out); + + free(buffer); + for(i = 0; i < 256; i++) + free(prefix[i]); + free(prefix); + + printf("Nodes: %d, Optimal: %d, Signatures optimized: %d\n", nodes, optimal, optimized); + exit(0); +} diff --git a/clamav-devel/sigtool/sigtool.c b/clamav-devel/sigtool/sigtool.c index b8421a4d9..e210e237d 100644 --- a/clamav-devel/sigtool/sigtool.c +++ b/clamav-devel/sigtool/sigtool.c @@ -254,7 +254,8 @@ int build(struct optstruct *opt) exit(1); case 0: { - char *args[] = { "tar", "-cvf", tarfile, "COPYING", "viruses.db", "viruses.db2", "Notes", "viruses.db3", NULL }; + char *args[] = { "tar", "-cvf", NULL, "COPYING", "viruses.db", "viruses.db2", "Notes", "viruses.db3", NULL }; + args[2] = tarfile; execv("/bin/tar", args); mprintf("!Can't execute tar\n"); perror("tar");