From afd00a720776ecbddac2ced7f3a379fa9472340b Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Fri, 9 May 2014 13:30:32 -0400 Subject: [PATCH] ooxml: added checks for property keys ooxml: added lists of praoperty tags ooxml: added the ability to skip invalid elements --- libclamav/ooxml.c | 296 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 293 insertions(+), 3 deletions(-) diff --git a/libclamav/ooxml.c b/libclamav/ooxml.c index 0fd1ce182..c2e11118f 100644 --- a/libclamav/ooxml.c +++ b/libclamav/ooxml.c @@ -45,7 +45,8 @@ #endif #define OOXML_JSON_RECLEVEL 16 -#define OOXML_JSON_RECLEVEL_MAX 32 +#define OOXML_JSON_RECLEVEL_MAX 5 +#define OOXML_JSON_STRLEN_MAX 100 static int ooxml_is_int(const char *value, size_t len, int32_t *val2) { @@ -62,6 +63,293 @@ static int ooxml_is_int(const char *value, size_t len, int32_t *val2) return 1; } +static const char *ooxml_keys[] = { + "coreproperties", + "title", + "subject", + "creator", + "keywords", + "comments", + "description", + "lastmodifiedby", + "revision", + "created", + "modified", + "category", + "contentstatus", + + "properties", + "application", + "appversion", + "characters", + "characterswithspaces", + "company", + "digsig", + "docsecurity", + //"headingpairs", + "hiddenslides", + "hlinks", + "hyperlinkbase", + "hyperlinkschanged", + "lines", + "linksuptodate", + "manager", + "mmclips", + "notes", + "pages", + "paragraphs", + "presentationformat", + "properties", + "scalecrop", + "shareddoc", + "slides", + "template", + //"titlesofparts", + "totaltime", + "words" +}; +static const char *ooxml_json_keys[] = { + "CoreProperties", + "Title", + "Subject", + "Author", + "Keywords", + "Comments", + "Description", + "LastAuthor", + "Revision", + "Created", + "Modified", + "Category", + "ContentStatus", + + "ExtendedProperties", + "Application", + "AppVersion", + "Characters", + "CharactersWithSpaces", + "Company", + "DigSig", + "DocSecurity", + //"HeadingPairs", + "HiddenSlides", + "HLinks", + "HyperlinkBase", + "HyperlinksChanged", + "Lines", + "LinksUpToDate", + "Manager", + "MMClips", + "Notes", + "Pages", + "Paragraphs", + "PresentationFormat", + "Properties", + "ScaleCrop", + "SharedDoc", + "Slides", + "Template", + //"TitlesOfParts", + "TotalTime", + "Words" +}; +static size_t num_ooxml_keys = 40; //42 + +static const char *ooxml_check_key(const char* key, size_t keylen) +{ + unsigned i; + char keycmp[OOXML_JSON_STRLEN_MAX]; + + if (keylen > OOXML_JSON_STRLEN_MAX-1) { + cli_dbgmsg("ooxml_check_key: key name too long\n"); + return NULL; + } + + for (i = 0; i < keylen; i++) { + if (key[i] >= 'A' && key[i] <= 'Z') { + keycmp[i] = key[i] - 'A' + 'a'; + } + else { + keycmp[i] = key[i]; + } + } + keycmp[keylen] = '\0'; + + for (i = 0; i < num_ooxml_keys; ++i) { + //cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]); + if (keylen == strlen(ooxml_keys[i]) && !strncmp(keycmp, ooxml_keys[i], keylen)) { + return ooxml_json_keys[i]; + } + } + + return NULL; +} +#if HAVE_LIBXML2 +#if HAVE_JSON +static int ooxml_parse_element(xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, int skip) +{ + const char *element_tag = NULL, *end_tag = NULL; + const xmlChar *node_name = NULL, *node_value = NULL; + json_object *njptr; + int node_type, ret = CL_SUCCESS; + int32_t val2; + + cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl); + + /* check recursion level */ + if (rlvl >= OOXML_JSON_RECLEVEL_MAX) { + return CL_EMAXREC; + } + + if (wrkptr == NULL) { + skip = 1; + } + + /* acquire element type */ + node_type = xmlTextReaderNodeType(reader); + if (node_type != XML_READER_TYPE_ELEMENT) { + cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT); + return CL_EPARSE; /* first type is not an element */ + } + + /* acquire element tag */ + node_name = xmlTextReaderConstLocalName(reader); + if (!node_name) { + cli_dbgmsg("ooxml_parse_element: element tag node nameless\n"); + return CL_EPARSE; /* no name, nameless */ + } + element_tag = ooxml_check_key(node_name, xmlStrlen(node_name)); + if (!element_tag) { + cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name); + skip = 1; /* skipping element */ + //return CL_EFORMAT; /* REMOVE */ + } + + /* handle attributes if you want */ + + /* loop across all element contents */ + while (xmlTextReaderRead(reader) == 1) { + node_type = xmlTextReaderNodeType(reader); + switch (node_type) { + case XML_READER_TYPE_ELEMENT: + if (!skip) { + njptr = json_object_object_get(wrkptr, element_tag); + if (!njptr) { + njptr = json_object_new_object(); + if (NULL == njptr) { + cli_errmsg("ooxml_basic_json: no memory for json object.\n"); + return CL_EMEM; + } + cli_dbgmsg("ooxml_basic_json: added json object [%s]\n", element_tag); + json_object_object_add(wrkptr, element_tag, njptr); + } + else { + if (!json_object_is_type(njptr, json_type_object)) { + cli_warnmsg("ooxml_content_cb: json object [%s] already exists as not an object\n", element_tag); + return CL_EFORMAT; + } + } + } + else { + njptr = NULL; + } + + ret = ooxml_parse_element(reader, njptr, rlvl+1, skip); + if (ret != CL_SUCCESS) { + return ret; + } + break; + case XML_READER_TYPE_END_ELEMENT: + cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl); + node_name = xmlTextReaderConstLocalName(reader); + if (!node_name) { + cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n"); + return CL_EPARSE; /* no name, nameless */ + } + if (!skip) { + end_tag = ooxml_check_key(node_name, xmlStrlen(node_name)); + if (!end_tag) { + cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name); + return CL_EFORMAT; /* unrecognized element tag */ + } + if (strncmp(element_tag, end_tag, strlen(element_tag))) { + cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n"); + return CL_EFORMAT; + } + } + return CL_SUCCESS; + case XML_READER_TYPE_TEXT: + if (!skip) { + node_value = xmlTextReaderConstValue(reader); + njptr = json_object_object_get(wrkptr, element_tag); + if (njptr) { + cli_warnmsg("ooxml_parse_element: json object [%s] already exists\n", element_tag); + } + + if (ooxml_is_int(node_value, xmlStrlen(node_value), &val2)) { + ret = cli_jsonint(wrkptr, element_tag, val2); + } + else if (!xmlStrcmp(node_value, "true")) { + ret = cli_jsonbool(wrkptr, element_tag, 1); + } + else if (!xmlStrcmp(node_value, "false")) { + ret = cli_jsonbool(wrkptr, element_tag, 0); + } + else { + ret = cli_jsonstr(wrkptr, element_tag, node_value); + } + + if (ret != CL_SUCCESS) + return ret; + + cli_dbgmsg("ooxml_basic_json: added json value [%s: %s]\n", element_tag, node_value); + } + else { + node_name = xmlTextReaderConstLocalName(reader); + node_value = xmlTextReaderConstValue(reader); + + cli_dbgmsg("ooxml_parse_element: not adding xml node %s [%d]: %s\n", node_name, node_type, node_value); + } + break; + default: + node_name = xmlTextReaderConstLocalName(reader); + node_value = xmlTextReaderConstValue(reader); + + cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value); + return CL_EPARSE; + } + } + + return CL_SUCCESS; +} + +static int ooxml_parse_document(int fd, cli_ctx *ctx) +{ + int ret = CL_SUCCESS; + xmlTextReaderPtr reader = NULL; + + cli_dbgmsg("in ooxml_parse_document\n"); + + reader = xmlReaderForFd(fd, "properties.xml", NULL, 0); + if (reader == NULL) { + cli_dbgmsg("ooxml_parse_document: xmlReaderForFd error\n"); + return CL_SUCCESS; // internal error from libxml2 + } + + /* move reader to first element */ + if (xmlTextReaderRead(reader) != 1) { + return CL_SUCCESS; /* libxml2 failed */ + } + + ret = ooxml_parse_element(reader, ctx->wrkproperty, 0, 0); + + xmlTextReaderClose(reader); + xmlFreeTextReader(reader); + return ret; +} +#endif +#endif + static int ooxml_basic_json(int fd, cli_ctx *ctx, const char *key) { int ret = CL_SUCCESS; @@ -179,13 +467,15 @@ static int ooxml_basic_json(int fd, cli_ctx *ctx, const char *key) static int ooxml_core_cb(int fd, cli_ctx *ctx) { cli_dbgmsg("in ooxml_core_cb\n"); - return ooxml_basic_json(fd, ctx, "CoreProperties"); + return ooxml_parse_document(fd, ctx); + //return ooxml_basic_json(fd, ctx, "CoreProperties"); } static int ooxml_extn_cb(int fd, cli_ctx *ctx) { cli_dbgmsg("in ooxml_extn_cb\n"); - return ooxml_basic_json(fd, ctx, "ExtendedProperties"); + return ooxml_parse_document(fd, ctx); + //return ooxml_basic_json(fd, ctx, "ExtendedProperties"); } static int ooxml_content_cb(int fd, cli_ctx *ctx)