ooxml: moved value parsing into separate function

ooxml: cleaned up comments
ooxml: restructured code for continued parsing after excedding rlvl limit
pull/6/head
Kevin Lin 12 years ago
parent 90f830e7c5
commit d48fd4bbf2
  1. 177
      libclamav/ooxml.c

@ -52,21 +52,41 @@
#define OOXML_JSON_RECLEVEL_MAX 5 #define OOXML_JSON_RECLEVEL_MAX 5
#define OOXML_JSON_STRLEN_MAX 100 #define OOXML_JSON_STRLEN_MAX 100
static int ooxml_is_int(const char *value, size_t len, int32_t *val2) static int ooxml_is_int(const char *value, size_t len, int32_t *val)
{ {
long val3; long val2;
char *endptr = NULL; char *endptr = NULL;
val3 = strtol(value, &endptr, 10); val2 = strtol(value, &endptr, 10);
if (endptr != value+len) { if (endptr != value+len) {
return 0; return 0;
} }
*val2 = (int32_t)(val3 & 0x0000ffff); *val = (int32_t)(val2 & 0x0000ffff);
return 1; return 1;
} }
static int ooxml_parse_value(json_object *wrkptr, const char *element_tag, const xmlChar *node_value)
{
int ret = CL_SUCCESS;
int val;
if (ooxml_is_int(node_value, xmlStrlen(node_value), &val)) {
ret = cli_jsonint(wrkptr, element_tag, val);
}
else if (!xmlStrcmp(node_value, "true")) {
ret = cli_jsonbool(wrkptr, element_tag, 1);
}
else if (!xmlStrcmp(node_value, "false")) {
ret = cli_jsonbool(wrkptr, element_tag, 0);
}
else {
ret = cli_jsonstr(wrkptr, element_tag, node_value);
}
return ret;
}
static const char *ooxml_keys[] = { static const char *ooxml_keys[] = {
"coreproperties", "coreproperties",
"title", "title",
@ -178,23 +198,22 @@ static const char *ooxml_check_key(const char* key, size_t keylen)
return NULL; return NULL;
} }
static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, int skip) static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl)
{ {
const char *element_tag = NULL, *end_tag = NULL; const char *element_tag = NULL, *end_tag = NULL;
const xmlChar *node_name = NULL, *node_value = NULL; const xmlChar *node_name = NULL, *node_value = NULL;
json_object *njptr; json_object *thisjobj;
int node_type, ret = CL_SUCCESS, toval = 0;; int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0;
int32_t val2;
cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl); cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
/* check recursion level */ /* check recursion level */
if (rlvl >= OOXML_JSON_RECLEVEL_MAX) { if (rlvl >= OOXML_JSON_RECLEVEL_MAX) {
return CL_EMAXREC; cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n", node_name);
} /* skip it */
xmlTextReaderNext(reader);
if (wrkptr == NULL) { //return CL_EMAXREC;
skip = 1; return CL_SUCCESS;
} }
/* acquire element type */ /* acquire element type */
@ -204,7 +223,6 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
return CL_EPARSE; /* first type is not an element */ return CL_EPARSE; /* first type is not an element */
} }
/* acquire element tag */
node_name = xmlTextReaderConstLocalName(reader); node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) { if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element tag node nameless\n"); cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
@ -213,13 +231,19 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
element_tag = ooxml_check_key(node_name, xmlStrlen(node_name)); element_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
if (!element_tag) { if (!element_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name); cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name);
skip = 1; /* skipping element */ /* skip it */
xmlTextReaderNext(reader);
return CL_SUCCESS;
} }
/* handle attributes if you want */ /* handle attributes if you want */
/* loop across all element contents */ /* advance to first content node */
while (xmlTextReaderRead(reader) == 1) { if (xmlTextReaderRead(reader) != 1)
return CL_EPARSE;
/* parse until the end element tag */
while (!endtag) {
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
return CL_ETIMEOUT; return CL_ETIMEOUT;
} }
@ -227,23 +251,19 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
node_type = xmlTextReaderNodeType(reader); node_type = xmlTextReaderNodeType(reader);
switch (node_type) { switch (node_type) {
case XML_READER_TYPE_ELEMENT: case XML_READER_TYPE_ELEMENT:
if (!skip) { /* generate json object node */
njptr = cli_jsonobj(wrkptr, element_tag); thisjobj = cli_jsonobj(wrkptr, element_tag);
if (!njptr) { if (!thisjobj) {
cli_errmsg("ooxml_parse_element: failed to retrieve node for json object [%s]\n", element_tag); return CL_EPARSE;
return CL_EFORMAT;
}
cli_dbgmsg("ooxml_parse_element: added json object [%s]\n", element_tag);
} }
else { cli_dbgmsg("ooxml_parse_element: retrieved json object [%s]\n", element_tag);
njptr = NULL;
}
ret = ooxml_parse_element(ctx, reader, njptr, rlvl+1, skip); ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
return ret; return ret;
} }
break; break;
case XML_READER_TYPE_END_ELEMENT: case XML_READER_TYPE_END_ELEMENT:
cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl); cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
node_name = xmlTextReaderConstLocalName(reader); node_name = xmlTextReaderConstLocalName(reader);
@ -251,49 +271,39 @@ static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_objec
cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n"); cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
return CL_EPARSE; /* no name, nameless */ return CL_EPARSE; /* no name, nameless */
} }
if (!skip) {
end_tag = ooxml_check_key(node_name, xmlStrlen(node_name)); end_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
if (!end_tag) { if (!end_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name); cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
return CL_EFORMAT; /* unrecognized element tag */ return CL_EFORMAT; /* unrecognized element tag */
} }
if (strncmp(element_tag, end_tag, strlen(element_tag))) { if (strncmp(element_tag, end_tag, strlen(element_tag))) {
cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n"); cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
return CL_EFORMAT; return CL_EFORMAT;
}
} }
return CL_SUCCESS;
/* advance to next element tag */
if (xmlTextReaderRead(reader) != 1)
return CL_EPARSE;
endtag = 1;
break;
case XML_READER_TYPE_TEXT: case XML_READER_TYPE_TEXT:
if (!skip) { node_value = xmlTextReaderConstValue(reader);
node_value = xmlTextReaderConstValue(reader);
if (ooxml_is_int(node_value, xmlStrlen(node_value), &val2)) { ret = ooxml_parse_value(wrkptr, element_tag, node_value);
ret = cli_jsonint(wrkptr, element_tag, val2); if (ret != CL_SUCCESS)
} return ret;
else if (!xmlStrcmp(node_value, "true")) {
ret = cli_jsonbool(wrkptr, element_tag, 1);
}
else if (!xmlStrcmp(node_value, "false")) {
ret = cli_jsonbool(wrkptr, element_tag, 0);
}
else {
ret = cli_jsonstr(wrkptr, element_tag, node_value);
}
if (ret != CL_SUCCESS) cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value);
return ret;
cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value); /* advance to next element tag */
} if (xmlTextReaderRead(reader) != 1)
#if OOXML_DEBUG return CL_EPARSE;
else {
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
cli_dbgmsg("ooxml_parse_element: not adding xml node %s [%d]: %s\n", node_name, node_type, node_value);
}
#endif
break; break;
default: default:
#if OOXML_DEBUG #if OOXML_DEBUG
node_name = xmlTextReaderConstLocalName(reader); node_name = xmlTextReaderConstLocalName(reader);
@ -326,7 +336,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx)
return CL_SUCCESS; /* libxml2 failed */ return CL_SUCCESS; /* libxml2 failed */
} }
ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, 0); ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0);
xmlTextReaderClose(reader); xmlTextReaderClose(reader);
xmlFreeTextReader(reader); xmlFreeTextReader(reader);
@ -351,6 +361,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
{ {
int ret = CL_SUCCESS, tmp, toval = 0; int ret = CL_SUCCESS, tmp, toval = 0;
int core=0, extn=0, cust=0, dsig=0; int core=0, extn=0, cust=0, dsig=0;
int mcore=0, mextn=0, mcust=0;
const xmlChar *name, *value, *CT, *PN; const xmlChar *name, *value, *CT, *PN;
xmlTextReaderPtr reader = NULL; xmlTextReaderPtr reader = NULL;
uint32_t loff; uint32_t loff;
@ -404,13 +415,14 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
} }
else if (tmp != CL_VIRUS) { else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find core properties file \"%s\"!\n", PN); cli_dbgmsg("cli_process_ooxml: failed to find core properties file \"%s\"!\n", PN);
mcore++;
} }
else { else {
cli_dbgmsg("ooxml_content_cb: found core properties file \"%s\" @ %x\n", PN, loff); cli_dbgmsg("ooxml_content_cb: found core properties file \"%s\" @ %x\n", PN, loff);
ret = unzip_single_internal(ctx, loff, ooxml_core_cb); ret = unzip_single_internal(ctx, loff, ooxml_core_cb);
core++;
} }
} }
core++;
} }
else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-officedocument.extended-properties+xml")) { else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-officedocument.extended-properties+xml")) {
if (!extn) { if (!extn) {
@ -421,13 +433,14 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
} }
else if (tmp != CL_VIRUS) { else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find extended properties file \"%s\"!\n", PN); cli_dbgmsg("cli_process_ooxml: failed to find extended properties file \"%s\"!\n", PN);
mextn++;
} }
else { else {
cli_dbgmsg("ooxml_content_cb: found extended properties file \"%s\" @ %x\n", PN, loff); cli_dbgmsg("ooxml_content_cb: found extended properties file \"%s\" @ %x\n", PN, loff);
ret = unzip_single_internal(ctx, loff, ooxml_extn_cb); ret = unzip_single_internal(ctx, loff, ooxml_extn_cb);
extn++;
} }
} }
extn++;
} }
else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-officedocument.custom-properties+xml")) { else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-officedocument.custom-properties+xml")) {
if (!cust) { if (!cust) {
@ -438,13 +451,14 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
} }
else if (tmp != CL_VIRUS) { else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find custom properties file \"%s\"!\n", PN); cli_dbgmsg("cli_process_ooxml: failed to find custom properties file \"%s\"!\n", PN);
mcust++;
} }
else { else {
cli_dbgmsg("ooxml_content_cb: found custom properties file \"%s\" @ %x\n", PN, loff); cli_dbgmsg("ooxml_content_cb: found custom properties file \"%s\" @ %x\n", PN, loff);
cust++;
//ret = unzip_single_internal(ctx, loff, ooxml_cust_cb); //ret = unzip_single_internal(ctx, loff, ooxml_cust_cb);
} }
} }
cust++;
} }
else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml")) { else if (!xmlStrcmp(CT, "application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml")) {
dsig++; dsig++;
@ -454,24 +468,27 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
goto ooxml_content_exit; goto ooxml_content_exit;
} }
if (core) { if (core)
cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core); cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core);
} else if (!mcore)
else {
cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n"); cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n");
} if (mcore)
if (extn) { cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", core);
if (extn)
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn); cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn);
} else if (!mextn)
else {
cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n"); cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n");
} if (mextn)
if (cust) { cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", extn);
if (cust)
cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust); cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust);
} else if (!mcust)
else {
cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n"); cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n");
} if (mcust)
cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", cust);
if (dsig) { if (dsig) {
cli_jsonint(ctx->wrkproperty, "DigitalSignaturesCount", dsig); cli_jsonint(ctx->wrkproperty, "DigitalSignaturesCount", dsig);
} }

Loading…
Cancel
Save