ClamAV is an open source (GPLv2) anti-virus toolkit.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
clamav/libclamav/hwp.c

843 lines
26 KiB

/*
* HWP Stuff
*
* Copyright (C) 2015 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#if HAVE_LIBXML2
#ifdef _WIN32
#ifndef LIBXML_WRITER_ENABLED
#define LIBXML_WRITER_ENABLED 1
#endif
#endif
#include <libxml/xmlreader.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <ctype.h>
#include <zlib.h>
#include "clamav.h"
#include "fmap.h"
#include "str.h"
#include "others.h"
#include "scanners.h"
#include "msxml_parser.h"
#include "msxml.h"
#include "json_api.h"
#include "hwp.h"
#if HAVE_JSON
#include "msdoc.h"
#endif
#define HWP5_DEBUG 0
#define HWP3_DEBUG 1
#define HWPML_DEBUG 0
#if HWP5_DEBUG
#define hwp5_debug(...) cli_dbgmsg(__VA_ARGS__)
#else
#define hwp5_debug(...) ;
#endif
#if HWP3_DEBUG
#define hwp3_debug(...) cli_dbgmsg(__VA_ARGS__)
#else
#define hwp3_debug(...) ;
#endif
#if HWPML_DEBUG
#define hwpml_debug(...) cli_dbgmsg(__VA_ARGS__)
#else
#define hwpml_debug(...) ;
#endif
typedef int (*hwp_cb )(void *cbdata, int fd, cli_ctx *ctx);
static int decompress_and_callback(cli_ctx *ctx, fmap_t *input, off_t at, size_t len, const char *parent, hwp_cb cb, void *cbdata)
{
int zret, ofd, ret = CL_SUCCESS;
off_t off_in = at;
size_t count, remain = 1, outsize = 0;
z_stream zstrm;
char *tmpname;
unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
if (!ctx || !input || !cb)
return CL_ENULLARG;
if (len)
remain = len;
/* reserve tempfile for output and callback */
if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
cli_errmsg("%s: Can't generate temporary file\n", parent);
return ret;
}
/* initialize zlib inflation stream */
memset(&zstrm, 0, sizeof(zstrm));
zstrm.zalloc = Z_NULL;
zstrm.zfree = Z_NULL;
zstrm.opaque = Z_NULL;
zstrm.next_in = inbuf;
zstrm.next_out = outbuf;
zstrm.avail_in = 0;
zstrm.avail_out = FILEBUFF;
zret = inflateInit2(&zstrm, -15);
if (zret != Z_OK) {
cli_errmsg("%s: Can't initialize zlib inflation stream\n", parent);
ret = CL_EUNPACK;
goto dc_end;
}
/* inflation loop */
do {
if (zstrm.avail_in == 0) {
zstrm.next_in = inbuf;
ret = fmap_readn(input, inbuf, off_in, FILEBUFF);
if (ret < 0) {
cli_errmsg("%s: Error reading stream\n", parent);
ret = CL_EUNPACK;
goto dc_end;
}
if (!ret)
break;
if (len) {
if (remain < ret)
ret = remain;
remain -= ret;
}
zstrm.avail_in = ret;
off_in += ret;
}
zret = inflate(&zstrm, Z_SYNC_FLUSH);
count = FILEBUFF - zstrm.avail_out;
if (count) {
if (cli_checklimits("HWP", ctx, outsize + count, 0, 0) != CL_SUCCESS)
break;
if (cli_writen(ofd, outbuf, count) != count) {
cli_errmsg("%s: Can't write to file %s\n", parent, tmpname);
ret = CL_EWRITE;
goto dc_end;
}
outsize += count;
}
zstrm.next_out = outbuf;
zstrm.avail_out = FILEBUFF;
} while(zret == Z_OK && remain);
/* post inflation checks */
if (zret != Z_STREAM_END && zret != Z_OK) {
if (outsize == 0) {
cli_infomsg(ctx, "%s: Error decompressing stream. No data decompressed.\n", parent);
ret = CL_EUNPACK;
goto dc_end;
}
cli_infomsg(ctx, "%s: Error decompressing stream. Scanning what was decompressed.\n", parent);
}
if (len && remain > 0)
cli_infomsg(ctx, "%s: Error decompressing stream. Not all requested input was converted\n", parent);
cli_dbgmsg("%s: Decompressed %llu bytes to %s\n", parent, (long long unsigned)outsize, tmpname);
/* scanning inflated stream */
ret = cb(cbdata, ofd, ctx);
/* clean-up */
dc_end:
zret = inflateEnd(&zstrm);
if (zret != Z_OK)
ret = CL_EUNPACK;
close(ofd);
if (!ctx->engine->keeptmp)
if (cli_unlink(tmpname))
ret = CL_EUNLINK;
free(tmpname);
return ret;
}
/*** HWPOLE2 ***/
int cli_scanhwpole2(cli_ctx *ctx)
{
fmap_t *map = *ctx->fmap;
uint32_t usize, asize;
asize = (uint32_t)(map->len - sizeof(usize));
if (fmap_readn(map, &usize, 0, sizeof(usize)) != sizeof(usize)) {
cli_errmsg("HWPOLE2: Failed to read uncompressed ole2 filesize\n");
return CL_EREAD;
}
if (usize != asize)
cli_warnmsg("HWPOLE2: Mismatched uncompressed prefix and size: %u != %u\n", usize, asize);
else
cli_dbgmsg("HWPOLE2: Matched uncompressed prefix and size: %u == %u\n", usize, asize);
return cli_map_scandesc(map, 4, map->len, ctx, CL_TYPE_ANY);
//return cli_map_scandesc(map, 4, map->len, ctx, CL_TYPE_OLE2);
}
/*** HWP5 ***/
int cli_hwp5header(cli_ctx *ctx, hwp5_header_t *hwp5)
{
#if HAVE_JSON
json_object *header, *flags;
if (!ctx || !hwp5)
return CL_ENULLARG;
header = cli_jsonobj(ctx->wrkproperty, "Hwp5Header");
if (!header) {
cli_errmsg("HWP5.x: No memory for Hwp5Header object\n");
return CL_EMEM;
}
/* magic */
cli_jsonstr(header, "Magic", hwp5->signature);
/* version */
cli_jsonint(header, "RawVersion", hwp5->version);
/* flags */
cli_jsonint(header, "RawFlags", hwp5->flags);
flags = cli_jsonarray(header, "Flags");
if (!flags) {
cli_errmsg("HWP5.x: No memory for Hwp5Header/Flags array\n");
return CL_EMEM;
}
if (hwp5->flags & HWP5_COMPRESSED) {
cli_jsonstr(flags, NULL, "HWP5_COMPRESSED");
}
if (hwp5->flags & HWP5_PASSWORD) {
cli_jsonstr(flags, NULL, "HWP5_PASSWORD");
}
if (hwp5->flags & HWP5_DISTRIBUTABLE) {
cli_jsonstr(flags, NULL, "HWP5_DISTRIBUTABLE");
}
if (hwp5->flags & HWP5_SCRIPT) {
cli_jsonstr(flags, NULL, "HWP5_SCRIPT");
}
if (hwp5->flags & HWP5_DRM) {
cli_jsonstr(flags, NULL, "HWP5_DRM");
}
if (hwp5->flags & HWP5_XMLTEMPLATE) {
cli_jsonstr(flags, NULL, "HWP5_XMLTEMPLATE");
}
if (hwp5->flags & HWP5_HISTORY) {
cli_jsonstr(flags, NULL, "HWP5_HISTORY");
}
if (hwp5->flags & HWP5_CERT_SIGNED) {
cli_jsonstr(flags, NULL, "HWP5_CERT_SIGNED");
}
if (hwp5->flags & HWP5_CERT_ENCRYPTED) {
cli_jsonstr(flags, NULL, "HWP5_CERT_ENCRYPTED");
}
if (hwp5->flags & HWP5_CERT_EXTRA) {
cli_jsonstr(flags, NULL, "HWP5_CERT_EXTRA");
}
if (hwp5->flags & HWP5_CERT_DRM) {
cli_jsonstr(flags, NULL, "HWP5_CERT_DRM");
}
if (hwp5->flags & HWP5_CCL) {
cli_jsonstr(flags, NULL, "HWP5_CCL");
}
#endif
return CL_SUCCESS;
}
static int hwp5_cb(void *cbdata, int fd, cli_ctx *ctx)
{
int ret;
if (fd < 0 || !ctx)
return CL_ENULLARG;
return cli_magic_scandesc(fd, ctx);
}
int cli_scanhwp5_stream(cli_ctx *ctx, hwp5_header_t *hwp5, char *name, int fd)
{
hwp5_debug("HWP5.x: NAME: %s\n", name);
if (fd < 0) {
cli_errmsg("HWP5.x: Invalid file descriptor argument\n");
return CL_ENULLARG;
}
/* encrypted and compressed streams */
if (!strncmp(name, "bin", 3) || !strncmp(name, "jscriptversion", 14) ||
!strncmp(name, "defaultjscript", 14) || !strncmp(name, "section", 7) ||
!strncmp(name, "viewtext", 8) || !strncmp(name, "docinfo", 7)) {
if (hwp5->flags & HWP5_PASSWORD) {
cli_dbgmsg("HWP5.x: Password encrypted stream, scanning as-is\n");
return cli_magic_scandesc(fd, ctx);
}
if (hwp5->flags & HWP5_COMPRESSED) {
/* DocInfo JSON Handling */
STATBUF statbuf;
fmap_t *input;
int ret;
hwp5_debug("HWP5.x: Sending %s for decompress and scan\n", name);
/* fmap the input file for easier manipulation */
if (FSTAT(fd, &statbuf) == -1) {
cli_errmsg("HWP5.x: Can't stat file descriptor\n");
return CL_ESTAT;
}
input = fmap(fd, 0, statbuf.st_size);
if (!input) {
cli_errmsg("HWP5.x: Failed to get fmap for input stream\n");
return CL_EMAP;
}
ret = decompress_and_callback(ctx, input, 0, 0, "HWP5.x", hwp5_cb, NULL);
funmap(input);
return ret;
}
}
#if HAVE_JSON
/* JSON Output Summary Information */
if (ctx->options & CL_SCAN_FILE_PROPERTIES && ctx->properties != NULL) {
if (name && !strncmp(name, "_5_hwpsummaryinformation", 24)) {
cli_dbgmsg("HWP5.x: Detected a '_5_hwpsummaryinformation' stream\n");
/* JSONOLE2 - what to do if something breaks? */
if (cli_ole2_summary_json(ctx, fd, 2) == CL_ETIMEOUT)
return CL_ETIMEOUT;
}
}
#endif
/* normal streams */
return cli_magic_scandesc(fd, ctx);
}
/*** HWP3 ***/
/* all fields use little endian and unicode encoding, if appliable */
//File Identification Information - (30 total bytes)
#define HWP3_IDENTITY_INFO_SIZE 30
//Document Information - (128 total bytes)
#define HWP3_DOCINFO_SIZE 128
struct hwp3_docinfo {
#define DI_WRITEPROT 24 /* offset 24 (4 bytes) - write protection */
#define DI_EXTERNAPP 28 /* offset 28 (2 bytes) - external application */
#define DI_PASSWD 96 /* offset 96 (2 bytes) - password protected */
#define DI_COMPRESSED 124 /* offset 124 (1 byte) - compression */
#define DI_INFOBLKSIZE 126 /* offset 126 (2 bytes) - information block length */
uint32_t di_writeprot;
uint16_t di_externapp;
uint16_t di_passwd;
uint8_t di_compressed;
uint16_t di_infoblksize;
};
//Document Summary - (1008 total bytes)
#define HWP3_DOCSUMMARY_SIZE 1008
struct hwp3_docsummary_entry {
off_t offset;
const char *name;
} hwp3_docsummary_fields[] = {
{ 0, "Title" }, /* offset 0 (56 x 2 bytes) - title */
{ 112, "Subject" }, /* offset 112 (56 x 2 bytes) - subject */
{ 224, "Author" }, /* offset 224 (56 x 2 bytes) - author */
{ 336, "Date" }, /* offset 336 (56 x 2 bytes) - date */
{ 448, "Keyword1" }, /* offset 448 (2 x 56 x 2 bytes) - keywords */
{ 560, "Keyword2" },
{ 672, "Etc0" }, /* offset 672 (3 x 56 x 2 bytes) - etc */
{ 784, "Etc1" },
{ 896, "Etc2" }
};
#define NUM_DOCSUMMARY_FIELDS sizeof(hwp3_docsummary_fields)/sizeof(struct hwp3_docsummary_entry)
static inline int parsehwp3_docinfo(cli_ctx *ctx, off_t offset, struct hwp3_docinfo *docinfo)
{
const uint8_t *hwp3_ptr;
#if HAVE_JSON
json_object *header, *flags;
#endif
//TODO: use fmap_readn?
if (!(hwp3_ptr = fmap_need_off_once(*ctx->fmap, offset, HWP3_DOCINFO_SIZE))) {
cli_errmsg("HWP3.x: Failed to read fmap for hwp docinfo\n");
return CL_EMAP;
}
memcpy(&(docinfo->di_writeprot), hwp3_ptr+DI_WRITEPROT, sizeof(docinfo->di_writeprot));
memcpy(&(docinfo->di_externapp), hwp3_ptr+DI_EXTERNAPP, sizeof(docinfo->di_externapp));
memcpy(&(docinfo->di_passwd), hwp3_ptr+DI_PASSWD, sizeof(docinfo->di_passwd));
memcpy(&(docinfo->di_compressed), hwp3_ptr+DI_COMPRESSED, sizeof(docinfo->di_compressed));
memcpy(&(docinfo->di_infoblksize), hwp3_ptr+DI_INFOBLKSIZE, sizeof(docinfo->di_infoblksize));
docinfo->di_writeprot = le32_to_host(docinfo->di_writeprot);
docinfo->di_externapp = le16_to_host(docinfo->di_externapp);
docinfo->di_passwd = le16_to_host(docinfo->di_passwd);
docinfo->di_infoblksize = le16_to_host(docinfo->di_infoblksize);
hwp3_debug("HWP3.x: di_writeprot: %u\n", docinfo->di_writeprot);
hwp3_debug("HWP3.x: di_externapp: %u\n", docinfo->di_externapp);
hwp3_debug("HWP3.x: di_passwd: %u\n", docinfo->di_passwd);
hwp3_debug("HWP3.x: di_compressed: %u\n", docinfo->di_compressed);
hwp3_debug("HWP3.x: di_infoblksize: %u\n", docinfo->di_infoblksize);
#if HAVE_JSON
header = cli_jsonobj(ctx->wrkproperty, "Hwp3Header");
if (!header) {
cli_errmsg("HWP3.x: No memory for Hwp3Header object\n");
return CL_EMEM;
}
flags = cli_jsonarray(header, "Flags");
if (!flags) {
cli_errmsg("HWP5.x: No memory for Hwp5Header/Flags array\n");
return CL_EMEM;
}
if (docinfo->di_writeprot) {
cli_jsonstr(flags, NULL, "HWP3_WRITEPROTECTED"); /* HWP3_DISTRIBUTABLE */
}
if (docinfo->di_externapp) {
cli_jsonstr(flags, NULL, "HWP3_EXTERNALAPPLICATION");
}
if (docinfo->di_passwd) {
cli_jsonstr(flags, NULL, "HWP3_PASSWORD");
}
if (docinfo->di_compressed) {
cli_jsonstr(flags, NULL, "HWP3_COMPRESSED");
}
#endif
return CL_SUCCESS;
}
static inline int parsehwp3_docsummary(cli_ctx *ctx, off_t offset)
{
#if HAVE_JSON
const uint8_t *hwp3_ptr;
char *str;
int i, ret;
json_object *summary;
if (!(hwp3_ptr = fmap_need_off_once(*ctx->fmap, offset, HWP3_DOCSUMMARY_SIZE))) {
cli_errmsg("HWP3.x: Failed to read fmap for hwp docinfo\n");
return CL_EMAP;
}
summary = cli_jsonobj(ctx->wrkproperty, "Hwp3SummaryInfo");
if (!summary) {
cli_errmsg("HWP3.x: No memory for json object\n");
return CL_EMEM;
}
for (i = 0; i < NUM_DOCSUMMARY_FIELDS; i++) {
str = cli_utf16_to_utf8(hwp3_ptr+hwp3_docsummary_fields[i].offset, 112, UTF16_LE);
if (!str) {
char *b64;
size_t b64len = strlen(hwp3_docsummary_fields[i].name)+8;
b64 = cli_calloc(1, b64len);
if (!b64) {
cli_errmsg("HWP3.x: Failed to allocate memory for b64 boolean\n");
return CL_EMEM;
}
snprintf(b64, b64len, "%s_base64", hwp3_docsummary_fields[i].name);
cli_jsonbool(summary, b64, 1);
free(b64);
str = (char *)cl_base64_encode(hwp3_ptr+hwp3_docsummary_fields[i].offset, 112);
}
if (!str) {
cli_errmsg("HWP3.x: Failed to generate UTF-8 conversion of property string\n");
return CL_EMEM;
}
hwp3_debug("HWP3.x: %s, %s\n", hwp3_docsummary_fields[i].name, str);
ret = cli_jsonstr(summary, hwp3_docsummary_fields[i].name, str);
free(str);
if (ret != CL_SUCCESS)
return ret;
}
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(offset);
#endif
return CL_SUCCESS;
}
/*
InfoBlock(#1):
Information Block ID (16-bytes)
Information Block Length(n) (16-bytes)
Information Block Contents (n-bytes)
AdditionalInfoBlocks:
Information Block ID (32-bytes)
Information Block Length(n) (32-bytes)
Information Block Contents (n-bytes)
*/
static inline int parsehwp3_infoblk(cli_ctx *ctx, off_t offset)
{
uint16_t infoid, infolen;
if (fmap_readn(*ctx->fmap, &infoid, offset, sizeof(infoid)) != sizeof(infoid)) {
cli_errmsg("HWP3.x: Failed to read infomation block id @ %llu\n",
(long long unsigned)offset);
return CL_EREAD;
}
if (fmap_readn(*ctx->fmap, &infolen, offset+sizeof(infoid), sizeof(infolen)) != sizeof(infolen)) {
cli_errmsg("HWP3.x: Failed to read infomation block len @ %llu\n",
(long long unsigned)offset);
return CL_EREAD;
}
infoid = le16_to_host(infoid);
infolen = le16_to_host(infolen);
hwp3_debug("HWP3.x: Information Block[%llu]: ID: %u\n", (long long unsigned)offset, infoid);
hwp3_debug("HWP3.x: Information Block[%llu]: LEN: %u\n", (long long unsigned)offset, infolen);
return CL_SUCCESS;
}
static int hwp3_cb(void *cbdata, int fd, cli_ctx *ctx)
{
fmap_t *dmap;
off_t offset = 0;
int i, ret = CL_SUCCESS;
uint16_t nstyles;
if (fd < 0) {
cli_errmsg("HWP3.x: Invalid file descriptor argument\n");
return CL_ENULLARG;
} else {
STATBUF statbuf;
if (FSTAT(fd, &statbuf) == -1) {
cli_errmsg("HWP3.x: Can't stat file descriptor\n");
return CL_ESTAT;
}
dmap = fmap(fd, 0, statbuf.st_size);
if (!dmap) {
cli_errmsg("HWP3.x: Failed to get fmap for uncompressed stream\n");
return CL_EMAP;
}
}
/* Fonts - 7 entries of 2 + (n x 40) bytes where n is the first 2 bytes of the entry */
for (i = 0; i < 7; i++) {
uint16_t nfonts;
if ((ret = fmap_readn(dmap, &nfonts, offset, sizeof(nfonts))) != sizeof(nfonts)) {
funmap(dmap);
return ret;
}
nfonts = le16_to_host(nfonts);
hwp3_debug("HWP3.x: Font Entry %d with %u entries @ offset %llu\n", i+1, nfonts, (long long unsigned)offset);
offset += (2 + nfonts * 40);
}
/* Styles - 2 + (n x 238) bytes where n is the first 2 bytes of the section */
if ((ret = fmap_readn(dmap, &nstyles, offset, sizeof(nstyles))) != sizeof(nstyles)) {
funmap(dmap);
return ret;
}
nstyles = le16_to_host(nstyles);
hwp3_debug("HWP3.x: %u Styles @ offset %llu\n", nstyles, (long long unsigned)offset);
offset += (2 + nstyles * 238);
/* Paragraphs */
hwp3_debug("HWP3.x: Paragraphs start @ offset %llu\n", (long long unsigned)offset);
/* Additional Information Block (Internal) - Attachments and Media */
/* scan the uncompressed stream? */
//ret = cli_map_scandesc(dmap, 0, 0, ctx, CL_TYPE_ANY);
funmap(dmap);
return ret;
}
int cli_scanhwp3(cli_ctx *ctx)
{
struct hwp3_docinfo docinfo;
int ret = CL_SUCCESS;
off_t offset = 0;
#if HAVE_JSON
/*
/* magic *
cli_jsonstr(header, "Magic", hwp5->signature);
/* version *
cli_jsonint(header, "RawVersion", hwp5->version);
*/
#endif
offset += HWP3_IDENTITY_INFO_SIZE;
if ((ret = parsehwp3_docinfo(ctx, offset, &docinfo)) != CL_SUCCESS)
return ret;
offset += HWP3_DOCINFO_SIZE;
if ((ret = parsehwp3_docsummary(ctx, offset)) != CL_SUCCESS)
return ret;
offset += HWP3_DOCSUMMARY_SIZE;
/* TODO: HANDLE OPTIONAL INFORMATION BLOCKS HERE */
/*
if (docinfo.di_infoblksize) {
if ((ret = parsehwp3_infoblk(ctx, offset)) != CL_SUCCESS)
return ret;
/* increment offset? /
}
*/
ret = decompress_and_callback(ctx, *ctx->fmap, offset, 0, "HWP3.x", hwp3_cb, NULL);
if (ret != CL_SUCCESS)
return ret;
/* TODO: HANDLE OPTIONAL ADDITIONAL INFORMATION BLOCKS */
return ret;
}
/*** HWPML (hijacking the msxml parser) ***/
static const struct key_entry hwpml_keys[] = {
{ "hwpml", "HWPML", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
/* HEAD - Document Properties */
{ "head", "Head", MSXML_JSON_WRKPTR },
{ "docsummary", "DocumentProperties", MSXML_JSON_WRKPTR },
{ "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "author", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "date", "Date", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "docsetting", "DocumentSettings", MSXML_JSON_WRKPTR },
{ "beginnumber", "BeginNumber", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
{ "caretpos", "CaretPos", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
{ "bindatalist", "BinDataList", MSXML_JSON_WRKPTR },
{ "binitem", "BinItem", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
{ "facenamelist", "FaceNameList", MSXML_IGNORE_ELEM }, /* fonts list */
{ "borderfilllist", "BorderFillList", MSXML_IGNORE_ELEM }, /* borders list */
{ "charshapelist", "CharShapeList", MSXML_IGNORE_ELEM }, /* character shapes */
{ "tabdeflist", "TableDefList", MSXML_IGNORE_ELEM }, /* table defs */
{ "numberinglist", "NumberingList", MSXML_IGNORE_ELEM }, /* numbering list */
{ "parashapelist", "ParagraphShapeList", MSXML_IGNORE_ELEM }, /* paragraph shapes */
{ "stylelist", "StyleList", MSXML_IGNORE_ELEM }, /* styles */
{ "compatibledocument", "WordCompatibility", MSXML_IGNORE_ELEM }, /* word compatibility data */
/* BODY - Document Contents */
{ "body", "Body", MSXML_IGNORE_ELEM }, /* document contents (we could build a document contents summary */
/* TAIL - Document Attachments */
{ "tail", "Tail", MSXML_JSON_WRKPTR },
{ "bindatastorage", "BinaryDataStorage", MSXML_JSON_WRKPTR },
{ "bindata", "BinaryData", MSXML_SCAN_CB | MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
{ "scriptcode", "ScriptCodeStorage", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB },
{ "scriptheader", "ScriptHeader", MSXML_SCAN_CB | MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "scriptsource", "ScriptSource", MSXML_SCAN_CB | MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }
};
static size_t num_hwpml_keys = sizeof(hwpml_keys) / sizeof(struct key_entry);
/* binary streams needs to be base64-decoded then decompressed if fields are set */
static int hwpml_scan_cb(void *cbdata, int fd, cli_ctx *ctx)
{
return cli_magic_scandesc(fd, ctx);
}
static int hwpml_binary_cb(int fd, cli_ctx *ctx, int num_attribs, struct attrib_entry *attribs)
{
int i, ret, df = 0, com = 0, enc = 0;
char name[1024], *tempfile = name;
/* check attributes for compression and encoding */
for (i = 0; i < num_attribs; i++) {
if (!strcmp(attribs[i].key, "Compress")) {
if (!strcmp(attribs[i].value, "true"))
com = 1;
else if (!strcmp(attribs[i].value, "false"))
com = 0;
else
com = -1;
}
if (!strcmp(attribs[i].key, "Encoding")) {
if (!strcmp(attribs[i].value, "Base64"))
enc = 1;
else
enc = -1;
}
}
hwpml_debug("HWPML: Checking attributes: com: %d, enc: %d\n", com, enc);
/* decode the binary data if needed - base64 */
if (enc < 0) {
cli_errmsg("HWPML: Unrecognized encoding method\n");
return cli_magic_scandesc(fd, ctx);
} else if (enc == 1) {
STATBUF statbuf;
fmap_t *input;
const char *instream;
char *decoded;
size_t decodedlen;
hwpml_debug("HWPML: Decoding base64-encoded binary data\n");
/* fmap the input file for easier manipulation */
if (FSTAT(fd, &statbuf) == -1) {
cli_errmsg("HWPML: Can't stat file descriptor\n");
return CL_ESTAT;
}
if (!(input = fmap(fd, 0, statbuf.st_size))) {
cli_errmsg("HWPML: Failed to get fmap for binary data\n");
return CL_EMAP;
}
/* send data for base64 conversion - TODO: what happens with really big files? */
if (!(instream = fmap_need_off_once(input, 0, input->len))) {
cli_errmsg("HWPML: Failed to get input stream from binary data\n");
funmap(input);
return CL_EMAP;
}
decoded = (char *)cl_base64_decode(instream, input->len, NULL, &decodedlen, 0);
funmap(input);
if (!decoded) {
cli_errmsg("HWPML: Failed to get base64 decode binary data\n");
return cli_magic_scandesc(fd, ctx);
}
/* open file for writing and scanning */
if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &df)) != CL_SUCCESS) {
cli_warnmsg("HWPML: Failed to create temporary file %s\n", tempfile);
return ret;
}
if(cli_writen(df, decoded, decodedlen) != (int)decodedlen) {
free(decoded);
close(df);
return CL_EWRITE;
}
free(decoded);
/* keeps the later logic simpler */
fd = df;
cli_dbgmsg("HWPML: Decoded binary data to %s\n", tempfile);
}
/* decompress the file if needed - zlib */
if (com) {
STATBUF statbuf;
fmap_t *input;
hwpml_debug("HWPML: Decompressing binary data\n");
/* fmap the input file for easier manipulation */
if (FSTAT(fd, &statbuf) == -1) {
cli_errmsg("HWPML: Can't stat file descriptor\n");
return CL_ESTAT;
}
input = fmap(fd, 0, statbuf.st_size);
if (!input) {
cli_errmsg("HWPML: Failed to get fmap for binary data\n");
return CL_EMAP;
}
ret = decompress_and_callback(ctx, input, 0, 0, "HWPML", hwpml_scan_cb, NULL);
funmap(input);
} else {
ret = hwpml_scan_cb(NULL, fd, ctx);
}
/* close decoded file descriptor if used */
if (df) {
close(df);
if (!(ctx->engine->keeptmp))
cli_unlink(tempfile);
}
return ret;
}
int cli_scanhwpml(cli_ctx *ctx)
{
#if HAVE_LIBXML2
struct msxml_cbdata cbdata;
xmlTextReaderPtr reader = NULL;
int state, ret = CL_SUCCESS;
cli_dbgmsg("in cli_scanhwpml()\n");
if (!ctx)
return CL_ENULLARG;
memset(&cbdata, 0, sizeof(cbdata));
cbdata.map = *ctx->fmap;
reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "hwpml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
if (!reader) {
cli_dbgmsg("cli_scanhwpml: cannot intialize xmlReader\n");
#if HAVE_JSON
ret = cli_json_parse_error(ctx->wrkproperty, "HWPML_ERROR_XML_READER_IO");
#endif
return ret; // libxml2 failed!
}
ret = cli_msxml_parse_document(ctx, reader, hwpml_keys, num_hwpml_keys, 1, hwpml_binary_cb);
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return ret;
#else
UNUSEDPARAM(ctx);
cli_dbgmsg("in cli_scanhwpml()\n");
cli_dbgmsg("cli_scanhwpml: scanning hwpml documents requires libxml2!\n");
return CL_SUCCESS;
#endif
}