support for generic text normalizer (CL_TYPE_SCRIPT)

git-svn: trunk@3584
remotes/push_mirror/metadata
Török Edvin 18 years ago
parent 3064a5428c
commit 015ce4a89b
  1. 5
      ChangeLog
  2. 1
      libclamav/dconf.c
  3. 1
      libclamav/dconf.h
  4. 1
      libclamav/filetypes.c
  5. 1
      libclamav/filetypes.h
  6. 73
      libclamav/scanners.c

@ -1,3 +1,8 @@
Mon Feb 4 23:20:12 EET 2008 (edwin)
------------------------------------
* libclamav/scanners, filetypes, dconf:
support for generic text normalizer (CL_TYPE_SCRIPT)
Mon Feb 4 23:06:34 EET 2008 (edwin)
---------------------------------
* libclamav/textnorm.[ch]: generic text normalizer (bb #241)

@ -86,6 +86,7 @@ static struct dconf_module modules[] = {
{ "DOCUMENT", "HTML", DOC_CONF_HTML, 1 },
{ "DOCUMENT", "RTF", DOC_CONF_RTF, 1 },
{ "DOCUMENT", "PDF", DOC_CONF_PDF, 1 },
{ "DOCUMENT", "SCRIPT", DOC_CONF_SCRIPT, 1 },
{ "MAIL", "MBOX", MAIL_CONF_MBOX, 1 },
{ "MAIL", "TNEF", MAIL_CONF_TNEF, 1 },

@ -74,6 +74,7 @@ struct cli_dconf {
#define DOC_CONF_HTML 0x1
#define DOC_CONF_RTF 0x2
#define DOC_CONF_PDF 0x4
#define DOC_CONF_SCRIPT 0x8
/* Mail flags */
#define MAIL_CONF_MBOX 0x1

@ -76,6 +76,7 @@ static const struct ftmap_s {
{ "CL_TYPE_PDF", CL_TYPE_PDF },
{ "CL_TYPE_UUENCODED", CL_TYPE_UUENCODED },
{ "CL_TYPE_HTML_UTF16", CL_TYPE_HTML_UTF16 },
{ "CL_TYPE_SCRIPT", CL_TYPE_SCRIPT },
{ "CL_TYPE_RTF", CL_TYPE_RTF },
{ "CL_TYPE_HTML", CL_TYPE_HTML },
{ "CL_TYPE_MAIL", CL_TYPE_MAIL },

@ -63,6 +63,7 @@ typedef enum {
CL_TYPE_CRYPTFF,
CL_TYPE_PDF,
CL_TYPE_UUENCODED,
CL_TYPE_SCRIPT,
CL_TYPE_HTML_UTF16,
CL_TYPE_RTF,

@ -85,6 +85,7 @@
#include "unarj.h"
#include "nulsft.h"
#include "autoit.h"
#include "textnorm.h"
#include <zlib.h>
#include "unzip.h"
@ -1064,6 +1065,73 @@ static int cli_scanhtml(int desc, cli_ctx *ctx)
return ret;
}
static int cli_scanscript(int desc, cli_ctx *ctx)
{
unsigned char buff[FILEBUFF];
unsigned char normalized[SCANBUFF];
struct text_norm_state state;
struct stat sb;
char *tmpname = NULL;
int ofd = -1, ret;
ssize_t nread;
cli_dbgmsg("in cli_scantext()\n");
if(fstat(desc, &sb) == -1) {
cli_errmsg("cli_scanscript: fstat() failed for descriptor %d\n", desc);
return CL_EIO;
}
/* don't normalize files that are too large */
if(sb.st_size > 10485760) {
cli_dbgmsg("cli_scanscript: exiting (file larger than 10 MB)\n");
return CL_CLEAN;
}
/* dump to disk only if explicitly asked to,
* otherwise we can process just in-memory */
if(cli_leavetemps_flag) {
if((ret = cli_gentempfd(NULL, &tmpname, &ofd))) {
cli_dbgmsg("cli_scanscript: Can't generate temporary file/descriptor\n");
return ret;
}
}
text_normalize_init(&state, normalized, sizeof(normalized));
ret = CL_CLEAN;
do {
nread = cli_readn(desc, buff, sizeof(buff));
if(nread <= 0 || state.out_pos + nread > state.out_len) {
/* flush if error/EOF, or too little buffer space left */
if((ofd != -1) && (write(ofd, state.out, state.out_pos) == -1)) {
cli_errmsg("cli_scanscript: can't write to file %s\n",tmpname);
close(ofd);
ofd = -1;
/* we can continue to scan in memory */
}
/* when we flush the buffer also scan */
if(cli_scanbuff(state.out, state.out_pos, ctx->virname, ctx->engine, CL_TYPE_TEXT_ASCII) == CL_VIRUS) {
ret = CL_VIRUS;
break;
}
text_normalize_reset(&state);
}
if(nread > 0 && (text_normalize_buffer(&state, buff, nread)) != nread) {
cli_dbgmsg("cli_scanscript: short read during normalizing\n");
}
/* used a do {}while() here, since we need to flush our buffers at the end,
* and using while(){} loop would mean code duplication */
} while (nread > 0);
if(cli_leavetemps_flag) {
free(tmpname);
close(ofd);
}
return ret;
}
static int cli_scanhtml_utf16(int desc, cli_ctx *ctx)
{
char *tempname, buff[512], *decoded;
@ -1838,6 +1906,11 @@ int cli_magic_scandesc(int desc, cli_ctx *ctx)
ret = cli_scanhtml_utf16(desc, ctx);
break;
case CL_TYPE_SCRIPT:
if(DCONF_DOC & DOC_CONF_SCRIPT)
ret = cli_scanscript(desc, ctx);
break;
case CL_TYPE_RTF:
if(DCONF_DOC & DOC_CONF_RTF)
ret = cli_scanrtf(desc, ctx);

Loading…
Cancel
Save