|
|
|
@ -31,7 +31,7 @@ |
|
|
|
|
#include "json.h" |
|
|
|
|
#endif |
|
|
|
|
#include "json_api.h" |
|
|
|
|
|
|
|
|
|
#include "msxml_parser.h" |
|
|
|
|
#include "ooxml.h" |
|
|
|
|
|
|
|
|
|
#if HAVE_LIBXML2 |
|
|
|
@ -47,362 +47,55 @@ |
|
|
|
|
|
|
|
|
|
#if HAVE_LIBXML2 && HAVE_JSON |
|
|
|
|
|
|
|
|
|
#define OOXML_JSON_RECLEVEL 16 |
|
|
|
|
#define OOXML_JSON_RECLEVEL_MAX 5 |
|
|
|
|
#define OOXML_JSON_STRLEN_MAX 100 |
|
|
|
|
|
|
|
|
|
#define check_state(state) \ |
|
|
|
|
do { \
|
|
|
|
|
if (state == -1) { \
|
|
|
|
|
cli_warnmsg("check_state[ooxml]: CL_EPARSE @ ln%d\n", __LINE__); \
|
|
|
|
|
return CL_EPARSE; \
|
|
|
|
|
} \
|
|
|
|
|
else if (state == 0) { \
|
|
|
|
|
cli_dbgmsg("check_state[ooxml]: CL_BREAK @ ln%d\n", __LINE__); \
|
|
|
|
|
return CL_BREAK; \
|
|
|
|
|
} \
|
|
|
|
|
} while(0) |
|
|
|
|
|
|
|
|
|
static int ooxml_is_int(const char *value, size_t len, int32_t *val) |
|
|
|
|
{ |
|
|
|
|
long val2; |
|
|
|
|
char *endptr = NULL; |
|
|
|
|
|
|
|
|
|
val2 = strtol(value, &endptr, 10); |
|
|
|
|
if (endptr != value+len) { |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
*val = (int32_t)(val2 & 0x0000ffff); |
|
|
|
|
|
|
|
|
|
return 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int ooxml_add_parse_error(json_object *wrkptr, const xmlChar *errstr) |
|
|
|
|
{ |
|
|
|
|
json_object *perr; |
|
|
|
|
|
|
|
|
|
if (!wrkptr) |
|
|
|
|
return CL_ENULLARG; |
|
|
|
|
|
|
|
|
|
perr = cli_jsonarray(wrkptr, "ParseErrors"); |
|
|
|
|
if (perr == NULL) { |
|
|
|
|
return CL_EMEM; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return cli_jsonstr(perr, NULL, errstr); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int ooxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value) |
|
|
|
|
{ |
|
|
|
|
json_object *newobj, *arrobj; |
|
|
|
|
int val; |
|
|
|
|
|
|
|
|
|
if (!wrkptr) |
|
|
|
|
return CL_ENULLARG; |
|
|
|
|
|
|
|
|
|
arrobj = cli_jsonarray(wrkptr, arrname); |
|
|
|
|
if (arrobj == NULL) { |
|
|
|
|
return CL_EMEM; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (ooxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) { |
|
|
|
|
newobj = json_object_new_int(val); |
|
|
|
|
} |
|
|
|
|
else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) { |
|
|
|
|
newobj = json_object_new_boolean(1); |
|
|
|
|
} |
|
|
|
|
else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) { |
|
|
|
|
newobj = json_object_new_boolean(0); |
|
|
|
|
} |
|
|
|
|
else { |
|
|
|
|
newobj = json_object_new_string((const char *)node_value); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (NULL == newobj) { |
|
|
|
|
cli_errmsg("ooxml_parse_value: no memory for json value for [%s]\n", arrname); |
|
|
|
|
return CL_EMEM; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
json_object_array_add(arrobj, newobj); |
|
|
|
|
return CL_SUCCESS; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static const char *ooxml_keys[] = { |
|
|
|
|
"coreproperties", |
|
|
|
|
"title", |
|
|
|
|
"subject", |
|
|
|
|
"creator", |
|
|
|
|
"keywords", |
|
|
|
|
"comments", |
|
|
|
|
"description", |
|
|
|
|
"lastmodifiedby", |
|
|
|
|
"revision", |
|
|
|
|
"created", |
|
|
|
|
"modified", |
|
|
|
|
"category", |
|
|
|
|
"contentstatus", |
|
|
|
|
|
|
|
|
|
"properties", |
|
|
|
|
"application", |
|
|
|
|
"appversion", |
|
|
|
|
"characters", |
|
|
|
|
"characterswithspaces", |
|
|
|
|
"company", |
|
|
|
|
"digsig", |
|
|
|
|
"docsecurity", |
|
|
|
|
//"headingpairs",
|
|
|
|
|
"hiddenslides", |
|
|
|
|
"hlinks", |
|
|
|
|
"hyperlinkbase", |
|
|
|
|
"hyperlinkschanged", |
|
|
|
|
"lines", |
|
|
|
|
"linksuptodate", |
|
|
|
|
"manager", |
|
|
|
|
"mmclips", |
|
|
|
|
"notes", |
|
|
|
|
"pages", |
|
|
|
|
"paragraphs", |
|
|
|
|
"presentationformat", |
|
|
|
|
"properties", |
|
|
|
|
"scalecrop", |
|
|
|
|
"shareddoc", |
|
|
|
|
"slides", |
|
|
|
|
"template", |
|
|
|
|
//"titlesofparts",
|
|
|
|
|
"totaltime", |
|
|
|
|
"words" |
|
|
|
|
}; |
|
|
|
|
static const char *ooxml_json_keys[] = { |
|
|
|
|
"CoreProperties", |
|
|
|
|
"Title", |
|
|
|
|
"Subject", |
|
|
|
|
"Author", |
|
|
|
|
"Keywords", |
|
|
|
|
"Comments", |
|
|
|
|
"Description", |
|
|
|
|
"LastAuthor", |
|
|
|
|
"Revision", |
|
|
|
|
"Created", |
|
|
|
|
"Modified", |
|
|
|
|
"Category", |
|
|
|
|
"ContentStatus", |
|
|
|
|
|
|
|
|
|
"ExtendedProperties", |
|
|
|
|
"Application", |
|
|
|
|
"AppVersion", |
|
|
|
|
"Characters", |
|
|
|
|
"CharactersWithSpaces", |
|
|
|
|
"Company", |
|
|
|
|
"DigSig", |
|
|
|
|
"DocSecurity", |
|
|
|
|
//"HeadingPairs",
|
|
|
|
|
"HiddenSlides", |
|
|
|
|
"HLinks", |
|
|
|
|
"HyperlinkBase", |
|
|
|
|
"HyperlinksChanged", |
|
|
|
|
"Lines", |
|
|
|
|
"LinksUpToDate", |
|
|
|
|
"Manager", |
|
|
|
|
"MultimediaClips", |
|
|
|
|
"Notes", |
|
|
|
|
"Pages", |
|
|
|
|
"Paragraphs", |
|
|
|
|
"PresentationFormat", |
|
|
|
|
"Properties", |
|
|
|
|
"ScaleCrop", |
|
|
|
|
"SharedDoc", |
|
|
|
|
"Slides", |
|
|
|
|
"Template", |
|
|
|
|
//"TitlesOfParts",
|
|
|
|
|
"TotalTime", |
|
|
|
|
"Words" |
|
|
|
|
static const struct key_entry ooxml_keys[] = { |
|
|
|
|
{ "coreproperties", "CoreProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, |
|
|
|
|
{ "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "subject", "Subject", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "creator", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "keywords", "Keywords", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "comments", "Comments", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "description", "Description", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "lastmodifiedby", "LastAuthor", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "revision", "Revision", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "created", "Created", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "modified", "Modified", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "category", "Category", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "contentstatus", "ContentStatus", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
|
|
|
|
|
{ "properties", "ExtendedProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, |
|
|
|
|
{ "application", "Application", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "appversion", "AppVersion", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "characters", "Characters", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "company", "Company", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "digsig", "DigSig", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "docsecurity", "DocSecurity", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
//{ "headingpairs", "HeadingPairs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
|
|
|
|
{ "hiddenslides", "HiddenSlides", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "hlinks", "HLinks", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "hyperlinkbase", "HyperlinkBase", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "hyperlinkschanged", "HyperlinksChanged", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "lines", "Lines", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "linksuptodate", "LinksUpToDate", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "manager", "Manager", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "mmclips", "MultimediaClips", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "notes", "Notes", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "pages", "Pages", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "paragraphs", "Paragraphs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "presentationformat", "PresentationFormat", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
//{ "properties", "Properties", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
|
|
|
|
{ "scalecrop", "ScaleCrop", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "shareddoc", "SharedDocs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "slides", "Slides", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "template", "Template", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
//{ "titleofparts", "TitleOfParts", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
|
|
|
|
|
{ "totaltime", "TotalTime", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
{ "words", "Words", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, |
|
|
|
|
|
|
|
|
|
/* Should NOT Exist */ |
|
|
|
|
{ "bindata", "BinaryData", MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT } |
|
|
|
|
}; |
|
|
|
|
static size_t num_ooxml_keys = 40; //42
|
|
|
|
|
|
|
|
|
|
static const char *ooxml_check_key(const char* key, size_t keylen) |
|
|
|
|
{ |
|
|
|
|
unsigned i; |
|
|
|
|
|
|
|
|
|
if (keylen > OOXML_JSON_STRLEN_MAX-1) { |
|
|
|
|
cli_dbgmsg("ooxml_check_key: key name too long\n"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (i = 0; i < num_ooxml_keys; ++i) { |
|
|
|
|
//cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]);
|
|
|
|
|
if (keylen == strlen(ooxml_keys[i]) && !strncasecmp(key, ooxml_keys[i], keylen)) { |
|
|
|
|
return ooxml_json_keys[i]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, json_object *root) |
|
|
|
|
{ |
|
|
|
|
const char *element_tag = NULL, *end_tag = NULL; |
|
|
|
|
const xmlChar *node_name = NULL, *node_value = NULL; |
|
|
|
|
json_object *thisjobj = NULL; |
|
|
|
|
int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0, state = 1; |
|
|
|
|
|
|
|
|
|
cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl); |
|
|
|
|
|
|
|
|
|
/* check recursion level */ |
|
|
|
|
if (rlvl >= OOXML_JSON_RECLEVEL_MAX) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n"); |
|
|
|
|
cli_jsonbool(root, "HitRecursiveLimit", 1); |
|
|
|
|
/* skip it */ |
|
|
|
|
state = xmlTextReaderNext(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
return CL_SUCCESS; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* acquire element type */ |
|
|
|
|
node_type = xmlTextReaderNodeType(reader); |
|
|
|
|
if (node_type == -1) |
|
|
|
|
return CL_EPARSE; |
|
|
|
|
|
|
|
|
|
if (node_type != XML_READER_TYPE_ELEMENT) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT); |
|
|
|
|
return CL_EFORMAT; /* first type is not an element */ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
node_name = xmlTextReaderConstLocalName(reader); |
|
|
|
|
if (!node_name) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: element tag node nameless\n"); |
|
|
|
|
return CL_EPARSE; /* no name, nameless */ |
|
|
|
|
} |
|
|
|
|
element_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name)); |
|
|
|
|
if (!element_tag) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name); |
|
|
|
|
/* skip it */ |
|
|
|
|
state = xmlTextReaderNext(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
return CL_SUCCESS; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* generate json object */ |
|
|
|
|
thisjobj = cli_jsonobj(wrkptr, element_tag); |
|
|
|
|
if (!thisjobj) { |
|
|
|
|
return CL_EMEM; |
|
|
|
|
} |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: generated json object [%s]\n", element_tag); |
|
|
|
|
|
|
|
|
|
if (rlvl == 0) |
|
|
|
|
root = thisjobj; |
|
|
|
|
|
|
|
|
|
/* handle attributes */ |
|
|
|
|
state = xmlTextReaderHasAttributes(reader); |
|
|
|
|
if (state == 1) { |
|
|
|
|
json_object *attributes; |
|
|
|
|
|
|
|
|
|
attributes = cli_jsonobj(thisjobj, "Attributes"); |
|
|
|
|
if (!attributes) { |
|
|
|
|
return CL_EPARSE; |
|
|
|
|
} |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: retrieved json object [Attributes]\n"); |
|
|
|
|
|
|
|
|
|
while (xmlTextReaderMoveToNextAttribute(reader) == 1) { |
|
|
|
|
const xmlChar *name, *value; |
|
|
|
|
name = xmlTextReaderConstLocalName(reader); |
|
|
|
|
value = xmlTextReaderConstValue(reader); |
|
|
|
|
if (name == NULL || value == NULL) continue; |
|
|
|
|
|
|
|
|
|
cli_dbgmsg("%s: %s\n", name, value); |
|
|
|
|
|
|
|
|
|
cli_jsonstr(attributes, name, (const char *)value); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if (state == -1) |
|
|
|
|
return CL_EPARSE; |
|
|
|
|
|
|
|
|
|
state = xmlTextReaderIsEmptyElement(reader); |
|
|
|
|
if (state == 1) { |
|
|
|
|
state = xmlTextReaderNext(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
return CL_SUCCESS; |
|
|
|
|
} |
|
|
|
|
else if (state == -1) |
|
|
|
|
return CL_EPARSE; |
|
|
|
|
|
|
|
|
|
/* advance to first content node */ |
|
|
|
|
state = xmlTextReaderRead(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
|
|
|
|
|
/* parse until the end element tag */ |
|
|
|
|
while (!endtag) { |
|
|
|
|
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { |
|
|
|
|
return CL_ETIMEOUT; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
node_type = xmlTextReaderNodeType(reader); |
|
|
|
|
if (node_type == -1) |
|
|
|
|
return CL_EPARSE; |
|
|
|
|
|
|
|
|
|
switch (node_type) { |
|
|
|
|
case XML_READER_TYPE_ELEMENT: |
|
|
|
|
ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1, root); |
|
|
|
|
if (ret != CL_SUCCESS) { |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
case XML_READER_TYPE_END_ELEMENT: |
|
|
|
|
cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl); |
|
|
|
|
node_name = xmlTextReaderConstLocalName(reader); |
|
|
|
|
if (!node_name) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n"); |
|
|
|
|
return CL_EPARSE; /* no name, nameless */ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
end_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name)); |
|
|
|
|
if (!end_tag) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name); |
|
|
|
|
return CL_EFORMAT; /* unrecognized element tag */ |
|
|
|
|
} |
|
|
|
|
if (strncmp(element_tag, end_tag, strlen(element_tag))) { |
|
|
|
|
cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n"); |
|
|
|
|
return CL_EFORMAT; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* advance to next element tag */ |
|
|
|
|
state = xmlTextReaderRead(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
|
|
|
|
|
endtag = 1; |
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
case XML_READER_TYPE_TEXT: |
|
|
|
|
node_value = xmlTextReaderConstValue(reader); |
|
|
|
|
|
|
|
|
|
ret = ooxml_parse_value(thisjobj, "Value", node_value); |
|
|
|
|
if (ret != CL_SUCCESS) |
|
|
|
|
return ret; |
|
|
|
|
|
|
|
|
|
cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value); |
|
|
|
|
|
|
|
|
|
/* advance to next element tag */ |
|
|
|
|
state = xmlTextReaderRead(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
default: |
|
|
|
|
#if OOXML_DEBUG |
|
|
|
|
node_name = xmlTextReaderConstLocalName(reader); |
|
|
|
|
node_value = xmlTextReaderConstValue(reader); |
|
|
|
|
|
|
|
|
|
cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value); |
|
|
|
|
#endif |
|
|
|
|
state = xmlTextReaderNext(reader); |
|
|
|
|
check_state(state); |
|
|
|
|
return CL_SUCCESS; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return CL_SUCCESS; |
|
|
|
|
} |
|
|
|
|
static size_t num_ooxml_keys = sizeof(ooxml_keys) / sizeof(struct key_entry); |
|
|
|
|
|
|
|
|
|
static int ooxml_updatelimits(int fd, cli_ctx *ctx) |
|
|
|
|
{ |
|
|
|
@ -433,14 +126,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx) |
|
|
|
|
return CL_SUCCESS; // internal error from libxml2
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* move reader to first element */ |
|
|
|
|
if (xmlTextReaderRead(reader) != 1) { |
|
|
|
|
xmlTextReaderClose(reader); |
|
|
|
|
xmlFreeTextReader(reader); |
|
|
|
|
return CL_SUCCESS; /* libxml2 failed */ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, NULL); |
|
|
|
|
ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1); |
|
|
|
|
|
|
|
|
|
if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK) |
|
|
|
|
cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n"); |
|
|
|
@ -457,9 +143,9 @@ static int ooxml_core_cb(int fd, cli_ctx *ctx) |
|
|
|
|
cli_dbgmsg("in ooxml_core_cb\n"); |
|
|
|
|
ret = ooxml_parse_document(fd, ctx); |
|
|
|
|
if (ret == CL_EPARSE) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER"); |
|
|
|
|
else if (ret == CL_EFORMAT) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED"); |
|
|
|
|
|
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
@ -471,9 +157,9 @@ static int ooxml_extn_cb(int fd, cli_ctx *ctx) |
|
|
|
|
cli_dbgmsg("in ooxml_extn_cb\n"); |
|
|
|
|
ret = ooxml_parse_document(fd, ctx); |
|
|
|
|
if (ret == CL_EPARSE) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER"); |
|
|
|
|
else if (ret == CL_EFORMAT) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED"); |
|
|
|
|
|
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
@ -501,7 +187,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx) |
|
|
|
|
reader = xmlReaderForFd(fd, "[Content_Types].xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS); |
|
|
|
|
if (reader == NULL) { |
|
|
|
|
cli_dbgmsg("ooxml_content_cb: xmlReaderForFd error for ""[Content_Types].xml""\n"); |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD"); |
|
|
|
|
|
|
|
|
|
ctx->scansize = sav_scansize; |
|
|
|
|
ctx->scannedfiles = sav_scannedfiles; |
|
|
|
@ -610,37 +296,37 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx) |
|
|
|
|
if (core) { |
|
|
|
|
cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core); |
|
|
|
|
if (core > 1) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES"); |
|
|
|
|
} |
|
|
|
|
else if (!mcore) |
|
|
|
|
cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n"); |
|
|
|
|
if (mcore) { |
|
|
|
|
cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", mcore); |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (extn) { |
|
|
|
|
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn); |
|
|
|
|
if (extn > 1) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES"); |
|
|
|
|
} |
|
|
|
|
else if (!mextn) |
|
|
|
|
cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n"); |
|
|
|
|
if (mextn) { |
|
|
|
|
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", mextn); |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (cust) { |
|
|
|
|
cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust); |
|
|
|
|
if (cust > 1) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES"); |
|
|
|
|
} |
|
|
|
|
else if (!mcust) |
|
|
|
|
cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n"); |
|
|
|
|
if (mcust) { |
|
|
|
|
cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", mcust); |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (dsig) { |
|
|
|
@ -704,25 +390,25 @@ int cli_process_ooxml(cli_ctx *ctx) |
|
|
|
|
/* find "[Content Types].xml" */ |
|
|
|
|
tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff); |
|
|
|
|
if (tmp == CL_ETIMEOUT) { |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); |
|
|
|
|
return CL_ETIMEOUT; |
|
|
|
|
} |
|
|
|
|
else if (tmp != CL_VIRUS) { |
|
|
|
|
cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n"); |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES"); |
|
|
|
|
return CL_EFORMAT; |
|
|
|
|
} |
|
|
|
|
cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff); |
|
|
|
|
|
|
|
|
|
tmp = unzip_single_internal(ctx, loff, ooxml_content_cb); |
|
|
|
|
if (tmp == CL_ETIMEOUT) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); |
|
|
|
|
else if (tmp == CL_EMEM) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM"); |
|
|
|
|
else if (tmp == CL_EMAXSIZE) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE"); |
|
|
|
|
else if (tmp == CL_EMAXFILES) |
|
|
|
|
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES"); |
|
|
|
|
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES"); |
|
|
|
|
|
|
|
|
|
return tmp; |
|
|
|
|
#else |
|
|
|
|