re-write HTML code

git-svn: trunk@849
remotes/push_mirror/metadata
Trog 21 years ago
parent d272908ae4
commit e57fa318f7
  1. 8
      clamav-devel/ChangeLog
  2. 1
      clamav-devel/libclamav/filetypes.c
  3. 1
      clamav-devel/libclamav/filetypes.h
  4. 1143
      clamav-devel/libclamav/htmlnorm.c
  5. 16
      clamav-devel/libclamav/htmlnorm.h
  6. 108
      clamav-devel/libclamav/scanners.c

@ -1,3 +1,11 @@
Mon Sep 13 11:23:21 BST 2004 (trog)
-----------------------------------
* libclamav: re-write HTML code:
- decode MS Script Encoder code
- doesn't require mmap(), uses it if available
- extract href tag values
- single pass parser
Mon Sep 13 03:31:58 CEST 2004 (tk)
----------------------------------
* libclamav: CL_BLOCKMAX: allow blocking (i.e. marking as viruses) of

@ -62,6 +62,7 @@ static const struct cli_magic_s cli_magic[] = {
{0, "MSCF", 4, "MS CAB", CL_MSCABFILE},
{0, "ITSF", 4, "MS CHM", CL_MSCHMFILE},
{257, "ustar", 5, "POSIX tar", CL_TARFILE},
{0, "#@~^", 4, "SCRENC", CL_SCRENC},
/* Mail */

@ -37,6 +37,7 @@ typedef enum {
CL_OLE2FILE,
CL_MSCABFILE,
CL_MSCHMFILE,
CL_SCRENC,
/* bigger numbers have higher priority (in o-t-f detection) */
CL_HTMLFILE, /* on the fly */

File diff suppressed because it is too large Load Diff

@ -19,11 +19,17 @@
#ifndef __HTMLNORM_H
#define __HTMLNORM_H
#include <sys/types.h>
typedef struct tag_arguments_tag
{
int count;
unsigned char **tag;
unsigned char **value;
} tag_arguments_t;
unsigned char *html_normalize(unsigned char *in_buff, off_t in_size);
unsigned char *remove_html_comments(unsigned char *line);
unsigned char *remove_html_char_ref(unsigned char *line);
char *quoted_decode(unsigned char *line, off_t in_size);
int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs);
int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs);
void html_tag_arg_free(tag_arguments_t *tags);
int html_screnc_decode(int fd, const char *dirname);
#endif

@ -680,55 +680,58 @@ static int cli_scanmscab(int desc, const char **virname, long int *scanned, cons
static int cli_scanhtml(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec)
{
unsigned char *membuff, *newbuff, *newbuff2;
struct stat statbuf;
int ret;
char *tempname, fullname[1024];
const char *tmpdir;
int ret=CL_CLEAN, fd;
#ifdef HAVE_MMAP
cli_dbgmsg("in cli_scanhtml()\n");
if(fstat(desc, &statbuf) != 0) {
cli_dbgmsg("HTML: Can't stat descriptor %d\n", desc);
return CL_EIO;
}
if((tmpdir = getenv("TMPDIR")) == NULL)
#ifdef P_tmpdir
tmpdir = P_tmpdir;
#else
tmpdir = "/tmp";
#endif
if(limits && limits->maxfilesize && (statbuf.st_size > limits->maxfilesize)) {
cli_dbgmsg("HTML: Size exceeded (%d, max: %ld)\n", statbuf.st_size, limits->maxfilesize);
return CL_CLEAN;
}
tempname = cli_gentemp(tmpdir);
membuff = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, desc, 0);
if(mkdir(tempname, 0700)) {
cli_dbgmsg("ScanHTML -> Can't create temporary directory %s\n", tempname);
return CL_ETMPDIR;
}
/* TODO: do file operations if mmap fails */
if(membuff == MAP_FAILED) {
cli_dbgmsg("HTML: mmap failed\n");
return CL_EMEM;
html_normalise_fd(desc, tempname, NULL);
snprintf(fullname, 1024, "%s/comment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0);
close(fd);
}
newbuff = html_normalize(membuff, statbuf.st_size);
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/nocomment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0);
close(fd);
}
}
if(newbuff) {
newbuff2 = remove_html_comments(newbuff);
free(newbuff);
newbuff = remove_html_char_ref(newbuff2);
free(newbuff2);
/* Normalise a second time as the above can leave inconsistent white
* space
*/
newbuff2 = html_normalize(newbuff, strlen(newbuff));
free(newbuff);
newbuff = newbuff2;
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/script.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0);
close(fd);
}
}
ret = cl_scanbuff(newbuff, strlen(newbuff), virname, root);
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(newbuff);
munmap(membuff, statbuf.st_size);
free(tempname);
return ret;
#else /* FIXME */
return CL_CLEAN;
#endif
}
static int cli_scan_mydoom_log(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec)
@ -1053,6 +1056,37 @@ static int cli_scanmschm(int desc, const char **virname, long int *scanned, cons
return ret;
}
static int cli_scanscrenc(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec)
{
const char *tmpdir;
char *tempname;
int ret = CL_CLEAN;
cli_dbgmsg("in cli_scanscrenc()\n");
if((tmpdir = getenv("TMPDIR")) == NULL)
#ifdef P_tmpdir
tmpdir = P_tmpdir;
#else
tmpdir = "/tmp";
#endif
tempname = cli_gentemp(tmpdir);
if(mkdir(tempname, 0700)) {
cli_dbgmsg("CHM: Can't create temporary directory %s\n", tempname);
return CL_ETMPDIR;
}
if (html_screnc_decode(desc, tempname))
ret = cli_scandir(tempname, virname, scanned, root, limits, options, arec, mrec);
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
static int cli_scanmail(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, int options, int *arec, int *mrec)
{
const char *tmpdir;
@ -1193,6 +1227,10 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
ret = cli_scantar(desc, virname, scanned, root, limits, options, arec, mrec);
break;
case CL_SCRENC:
ret = cli_scanscrenc(desc, virname, scanned, root, limits, options, arec, mrec);
break;
case CL_DATAFILE:
/* it could be a false positive and a standard DOS .COM file */
{

Loading…
Cancel
Save