From f73920a4840f3b79f6507faeca394a4225705466 Mon Sep 17 00:00:00 2001 From: Nigel Horne Date: Fri, 25 Jun 2004 13:58:41 +0000 Subject: [PATCH] Optimise messages without other messages encapsulated within them git-svn: trunk@633 --- clamav-devel/ChangeLog | 25 +++++++++++-------- clamav-devel/libclamav/mbox.c | 47 +++++++++++++++++++++++------------ 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/clamav-devel/ChangeLog b/clamav-devel/ChangeLog index c18af6ae4..cf1d0dc34 100644 --- a/clamav-devel/ChangeLog +++ b/clamav-devel/ChangeLog @@ -1,7 +1,12 @@ +Fri Jun 25 14:57:06 BST 2004 (njh) +---------------------------------- + * libclamav/mbox.c: Optimise the scanning of emails that don't have + other emails within them. + Thu Jun 24 22:38:16 BST 2004 (njh) ---------------------------------- * libclamav: Plug small memory leak when scanning emails with a large - number of attachments + number of attachments Handle uuencoded files created with buggy software Thu Jun 24 09:09:27 BST 2004 (trog) @@ -12,7 +17,7 @@ Thu Jun 24 09:09:27 BST 2004 (trog) Wed Jun 23 17:23:59 BST 2004 (njh) ---------------------------------- * libclamav/mbox.c: Further optimisation in the handling of empty lines - in emails + in emails Wed Jun 23 15:16:20 BST 2004 (trog) ----------------------------------- @@ -26,7 +31,7 @@ Tue Jun 22 18:47:32 CEST 2004 (tk) Tue Jun 22 11:58:06 BST 2004 (njh/trog) --------------------------------------- * libclamav/str.c: Rewrote cli_chomp() as discussed in the clamav-devel - mailing list + mailing list Tue Jun 22 05:09:54 BST 2004 (njh) ---------------------------------- @@ -34,8 +39,8 @@ Tue Jun 22 05:09:54 BST 2004 (njh) clamfi_abort Removed compilation warning in FreeBSD5.2 * libclamav: Call cli_chomp() twice to ensure \r is also removed in - emails - Optimise handling of blank lines in emails + emails + Optimise handling of blank lines in emails Trialing Andrey J. Melnikoff (TEMHOTA) 's patch to print stack trace on SIGSEGV. Tidied, optimised and applied the patch. Comments welcome. @@ -68,7 +73,7 @@ Mon Jun 21 11:21:48 BST 2004 (njh) Fri Jun 18 11:08:26 BST 2004 (njh) ---------------------------------- * libclamav: Allow any number of alternatives in - multipart messages + multipart messages Wed Jun 16 09:09:45 BST 2004 (njh) ---------------------------------- @@ -79,7 +84,7 @@ Wed Jun 16 09:09:45 BST 2004 (njh) * docs/man/clamav-milter.8: Added access to sendmail variables in template files * libclamav: Added small performance improvements - Added thread safety measures + Added thread safety measures Tue Jun 15 22:41:03 CEST 2004 (tk) ---------------------------------- @@ -99,7 +104,7 @@ Mon Jun 14 15:35:04 BST 2004 (njh) Mon Jun 14 10:07:24 BST 2004 (njh) ---------------------------------- * libclamav/mbox.c: Some spam generates very broken headers, added fix - to try to scan (with warnings about the assumptions made) + to try to scan (with warnings about the assumptions made) Sun Jun 13 14:26:33 CEST 2004 (tk) ---------------------------------- @@ -166,7 +171,7 @@ Tue Jun 8 22:46:29 BST 2004 (njh) Sun Jun 6 22:35:19 BST 2004 (njh) ---------------------------------- * libclamav/mbox.c: Find uuencoded viruses in multipart/mixed that have no - start of message boundaries + start of message boundaries Sun Jun 6 03:38:08 CEST 2004 (tk) ---------------------------------- @@ -211,7 +216,7 @@ Wed Jun 2 02:30:34 CEST 2004 (tk) Tue Jun 1 10:09:02 BST 2004 (njh) ---------------------------------- * libclamav/message.c: Corrupted BinHex could still cause crash on - some non Linux systems (thanks to Trog for spotting this one) + some non Linux systems (thanks to Trog for spotting this one) Sun May 30 03:35:38 CEST 2004 (tk) ---------------------------------- diff --git a/clamav-devel/libclamav/mbox.c b/clamav-devel/libclamav/mbox.c index 457e83501..cd0e452a4 100644 --- a/clamav-devel/libclamav/mbox.c +++ b/clamav-devel/libclamav/mbox.c @@ -17,6 +17,9 @@ * * Change History: * $Log: mbox.c,v $ + * Revision 1.82 2004/06/25 13:56:38 nigelhorne + * Optimise messages without other messages encapsulated within them + * * Revision 1.81 2004/06/24 21:36:38 nigelhorne * Plug memory leak with large number of attachments * @@ -231,7 +234,7 @@ * Compilable under SCO; removed duplicate code with message.c * */ -static char const rcsid[] = "$Id: mbox.c,v 1.81 2004/06/24 21:36:38 nigelhorne Exp $"; +static char const rcsid[] = "$Id: mbox.c,v 1.82 2004/06/25 13:56:38 nigelhorne Exp $"; #if HAVE_CONFIG_H #include "clamav-config.h" @@ -306,7 +309,7 @@ static void print_trace(int use_syslog); typedef enum { FALSE = 0, TRUE = 1 } bool; -static message *parseEmailHeaders(const message *m, const table_t *rfc821Table); +static message *parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy); static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table); static int parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, const char *dir, table_t *rfc821Table, table_t *subtypeTable); static int boundaryStart(const char *line, const char *boundary); @@ -488,7 +491,7 @@ cl_mbox(const char *dir, int desc) /* * End of a message in the mail box */ - body = parseEmailHeaders(m, rfc821Table); + body = parseEmailHeaders(m, rfc821Table, TRUE); messageDestroy(m); if(messageGetBody(body)) if(!parseEmailBody(body, NULL, 0, NULL, dir, rfc821Table, subtypeTable)) { @@ -534,7 +537,7 @@ cl_mbox(const char *dir, int desc) retcode = 0; - body = parseEmailHeaders(m, rfc821Table); + body = parseEmailHeaders(m, rfc821Table, TRUE); messageDestroy(m); /* * Write out the last entry in the mailbox @@ -563,13 +566,20 @@ cl_mbox(const char *dir, int desc) * This function parses the headers of m and sets the message's arguments * * Returns the message's body with the correct arguments set + * + * The downside of this approach is that for a short time we have two copies + * of the message in memory, the upside is that it makes for easier parsing + * of encapsulated messages, and in the long run uses less memory in those + * scenarios + * BUT: if 'destroy' is set, the caller has given us a hint than 'm' will + * not be used again before it is destroyed, so we can trash it */ static message * -parseEmailHeaders(const message *m, const table_t *rfc821Table) +parseEmailHeaders(message *m, const table_t *rfc821Table, bool destroy) { bool inContinuationHeader = FALSE; /* state machine: ugh */ bool inHeader = TRUE; - const text *t; + text *t; message *ret; cli_dbgmsg("parseEmailHeaders\n"); @@ -579,16 +589,21 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table) ret = messageCreate(); - for(t = messageGetBody(m); t; t = t->t_next) { + for(t = (text *)messageGetBody(m); t; t = t->t_next) { char *buffer; #ifdef CL_THREAD_SAFE char *strptr; #endif if(t->t_text) { - buffer = strdup(t->t_text); - if(buffer == NULL) - break; + if(destroy) { + buffer = t->t_text; + t->t_text = NULL; + } else { + buffer = strdup(t->t_text); + if(buffer == NULL) + break; + } if(cli_chomp(buffer) == 0) { free(buffer); buffer = NULL; @@ -605,8 +620,6 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table) inContinuationHeader = TRUE; if(inContinuationHeader) { - const char *ptr; - if(!continuationMarker(buffer)) inContinuationHeader = FALSE; /* no more args */ @@ -614,6 +627,8 @@ parseEmailHeaders(const message *m, const table_t *rfc821Table) * Add all the arguments on the line */ if(buffer) { + const char *ptr; + for(ptr = strtok_r(buffer, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr)) messageAddArgument(ret, ptr); free(buffer); @@ -1221,7 +1236,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con * many nested levels are * involved. */ - body = parseEmailHeaders(aMessage, rfc821Table); + body = parseEmailHeaders(aMessage, rfc821Table, TRUE); /* * We've fininished with the * original copy of the message, @@ -1247,7 +1262,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con */ cli_dbgmsg("Found multipart inside multipart\n"); if(aMessage) { - body = parseEmailHeaders(aMessage, rfc821Table); + body = parseEmailHeaders(aMessage, rfc821Table, TRUE); if(body) { assert(aMessage == messages[i]); messageDestroy(messages[i]); @@ -1449,7 +1464,7 @@ parseEmailBody(message *messageIn, blob **blobsIn, int nBlobs, text *textIn, con } if((strcasecmp(mimeSubtype, "rfc822") == 0) || (strcasecmp(mimeSubtype, "delivery-status") == 0)) { - message *m = parseEmailHeaders(mainMessage, rfc821Table); + message *m = parseEmailHeaders(mainMessage, rfc821Table, FALSE); if(m) { cli_dbgmsg("Decode rfc822"); @@ -2116,7 +2131,7 @@ print_trace(int use_syslog) if(use_syslog == 0) cli_dbgmsg("Backtrace of pid %d:\n", pid); - else + else syslog(LOG_ERR, "Backtrace of pid %d:", pid); for(i = 0; i < size; i++)