Hwp3.x: simplify property string handling

remotes/push_mirror/msola
Kevin Lin 10 years ago
parent 5cab01bd9f
commit 5b2a3e5ad1
  1. 111
      libclamav/hwp.c

@ -36,6 +36,7 @@
#include "clamav.h"
#include "fmap.h"
#include "str.h"
#include "others.h"
#include "scanners.h"
#include "json_api.h"
@ -355,90 +356,12 @@ struct hwp3_docsummary_entry {
{ 448, "Keyword1" }, /* offset 448 (2 x 56 x 2 bytes) - keywords */
{ 560, "Keyword2" },
{ 672, "Guitar0" }, /* offset 672 (3 x 56 x 2 bytes) - WTF guitar? */
{ 784, "Guitar1" },
{ 896, "Guitar2" }
{ 672, "Etc0" }, /* offset 672 (3 x 56 x 2 bytes) - etc */
{ 784, "Etc1" },
{ 896, "Etc2" }
};
#define NUM_DOCSUMMARY_FIELDS sizeof(hwp3_docsummary_fields)/sizeof(struct hwp3_docsummary_entry)
/* conversion function for little-endian unicode string to ascii */
static char *hwp_convert_utf16le(const uint8_t *begin, size_t sz)
{
char *outbuf = NULL;
#if HAVE_ICONV
char *buf, *p1, *p2;
off_t offset;
size_t inlen, outlen, nonrev;
int i, try;
iconv_t cd;
p1 = buf = cli_calloc(1, sz);
if (!(buf))
return NULL;
memcpy(buf, begin, sz);
inlen = sz;
cd = iconv_open("UTF-8", "UTF-16LE");
if (cd == (iconv_t)(-1)) {
char errbuf[128];
cli_strerror(errno, errbuf, sizeof(errbuf));
cli_errmsg("hwp_convert_utf16le: could not initialize iconv for encoding UTF-16LE: %s\n", errbuf);
/* TODO: JSON FAILURE TRACKING */
/* sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED; */
}
else {
offset = 0;
do {
outbuf = (char *)cli_calloc(1, sz+1);
if (!outbuf) {
free(buf);
return NULL;
}
outlen = sz - offset;
p2 = outbuf + offset;
/* conversion */
nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
if (errno == EILSEQ) {
cli_dbgmsg("hwp_convert_utf16le: input buffer contains invalid character for its encoding\n");
/* TODO: JSON FAILURE TRACKING */
/* sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID; */
break;
}
else if (errno == EINVAL && nonrev == (size_t)-1) {
cli_dbgmsg("hwp_convert_utf16le: input buffer contains incomplete multibyte character\n");
/* TODO: JSON FAILURE TRACKING */
/* sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE; */
break;
}
else if (inlen == 0) {
//cli_dbgmsg("hwp_convert_utf16le: input buffer is successfully translated\n");
break;
}
offset = sz - outlen;
} while(0);
if (errno == E2BIG && nonrev == (size_t)-1) {
cli_dbgmsg("hwp_convert_utf16le: buffer could not be fully translated\n");
/* TODO: JSON FAILURE TRACKING */
/* sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL; */
}
outbuf[sz - outlen] = '\0';
}
iconv_close(cd);
free(buf);
#endif
return outbuf; /* if this is NULL, we should base64 encode the data */
}
static inline int parsehwp3_docinfo(cli_ctx *ctx, struct hwp3_docinfo *docinfo)
{
const uint8_t *hwp3_ptr;
@ -519,12 +442,25 @@ static inline int parsehwp3_docsummary(cli_ctx *ctx)
}
for (i = 0; i < NUM_DOCSUMMARY_FIELDS; i++) {
str = hwp_convert_utf16le(hwp3_ptr+hwp3_docsummary_fields[i].offset, 112);
/*
if (!str)
TODO: BASE64
TODO: COMPRESSED SCANNING (FOR THE HWP5)
*/
str = cli_utf16_to_utf8(hwp3_ptr+hwp3_docsummary_fields[i].offset, 112, UTF16_LE);
if (!str) {
char *b64;
size_t b64len = strlen(hwp3_docsummary_fields[i].name)+8;
b64 = cli_calloc(1, b64len);
if (!b64) {
cli_errmsg("HWP3.x: Failed to allocate memory for b64 boolean\n");
return CL_EMEM;
}
snprintf(b64, b64len, "%s_base64", hwp3_docsummary_fields[i].name);
cli_jsonbool(summary, b64, 1);
free(b64);
str = (char *)cl_base64_encode(hwp3_ptr+hwp3_docsummary_fields[i].offset, 112);
}
if (!str) {
cli_errmsg("HWP3.x: Failed to generate UTF-8 conversion of property string\n");
return CL_EMEM;
}
hwp3_debug("HWP3.x: %s, %s\n", hwp3_docsummary_fields[i].name, str);
ret = cli_jsonstr(summary, hwp3_docsummary_fields[i].name, str);
@ -532,7 +468,6 @@ static inline int parsehwp3_docsummary(cli_ctx *ctx)
if (ret != CL_SUCCESS)
return ret;
}
#endif
return CL_SUCCESS;
}

Loading…
Cancel
Save