ooxml: added checks for property keys

ooxml: added lists of praoperty tags
ooxml: added the ability to skip invalid elements
pull/6/head
Kevin Lin 12 years ago
parent 546e168bb7
commit afd00a7207
  1. 296
      libclamav/ooxml.c

@ -45,7 +45,8 @@
#endif
#define OOXML_JSON_RECLEVEL 16
#define OOXML_JSON_RECLEVEL_MAX 32
#define OOXML_JSON_RECLEVEL_MAX 5
#define OOXML_JSON_STRLEN_MAX 100
static int ooxml_is_int(const char *value, size_t len, int32_t *val2)
{
@ -62,6 +63,293 @@ static int ooxml_is_int(const char *value, size_t len, int32_t *val2)
return 1;
}
static const char *ooxml_keys[] = {
"coreproperties",
"title",
"subject",
"creator",
"keywords",
"comments",
"description",
"lastmodifiedby",
"revision",
"created",
"modified",
"category",
"contentstatus",
"properties",
"application",
"appversion",
"characters",
"characterswithspaces",
"company",
"digsig",
"docsecurity",
//"headingpairs",
"hiddenslides",
"hlinks",
"hyperlinkbase",
"hyperlinkschanged",
"lines",
"linksuptodate",
"manager",
"mmclips",
"notes",
"pages",
"paragraphs",
"presentationformat",
"properties",
"scalecrop",
"shareddoc",
"slides",
"template",
//"titlesofparts",
"totaltime",
"words"
};
static const char *ooxml_json_keys[] = {
"CoreProperties",
"Title",
"Subject",
"Author",
"Keywords",
"Comments",
"Description",
"LastAuthor",
"Revision",
"Created",
"Modified",
"Category",
"ContentStatus",
"ExtendedProperties",
"Application",
"AppVersion",
"Characters",
"CharactersWithSpaces",
"Company",
"DigSig",
"DocSecurity",
//"HeadingPairs",
"HiddenSlides",
"HLinks",
"HyperlinkBase",
"HyperlinksChanged",
"Lines",
"LinksUpToDate",
"Manager",
"MMClips",
"Notes",
"Pages",
"Paragraphs",
"PresentationFormat",
"Properties",
"ScaleCrop",
"SharedDoc",
"Slides",
"Template",
//"TitlesOfParts",
"TotalTime",
"Words"
};
static size_t num_ooxml_keys = 40; //42
static const char *ooxml_check_key(const char* key, size_t keylen)
{
unsigned i;
char keycmp[OOXML_JSON_STRLEN_MAX];
if (keylen > OOXML_JSON_STRLEN_MAX-1) {
cli_dbgmsg("ooxml_check_key: key name too long\n");
return NULL;
}
for (i = 0; i < keylen; i++) {
if (key[i] >= 'A' && key[i] <= 'Z') {
keycmp[i] = key[i] - 'A' + 'a';
}
else {
keycmp[i] = key[i];
}
}
keycmp[keylen] = '\0';
for (i = 0; i < num_ooxml_keys; ++i) {
//cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]);
if (keylen == strlen(ooxml_keys[i]) && !strncmp(keycmp, ooxml_keys[i], keylen)) {
return ooxml_json_keys[i];
}
}
return NULL;
}
#if HAVE_LIBXML2
#if HAVE_JSON
static int ooxml_parse_element(xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, int skip)
{
const char *element_tag = NULL, *end_tag = NULL;
const xmlChar *node_name = NULL, *node_value = NULL;
json_object *njptr;
int node_type, ret = CL_SUCCESS;
int32_t val2;
cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
/* check recursion level */
if (rlvl >= OOXML_JSON_RECLEVEL_MAX) {
return CL_EMAXREC;
}
if (wrkptr == NULL) {
skip = 1;
}
/* acquire element type */
node_type = xmlTextReaderNodeType(reader);
if (node_type != XML_READER_TYPE_ELEMENT) {
cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT);
return CL_EPARSE; /* first type is not an element */
}
/* acquire element tag */
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
}
element_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
if (!element_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name);
skip = 1; /* skipping element */
//return CL_EFORMAT; /* REMOVE */
}
/* handle attributes if you want */
/* loop across all element contents */
while (xmlTextReaderRead(reader) == 1) {
node_type = xmlTextReaderNodeType(reader);
switch (node_type) {
case XML_READER_TYPE_ELEMENT:
if (!skip) {
njptr = json_object_object_get(wrkptr, element_tag);
if (!njptr) {
njptr = json_object_new_object();
if (NULL == njptr) {
cli_errmsg("ooxml_basic_json: no memory for json object.\n");
return CL_EMEM;
}
cli_dbgmsg("ooxml_basic_json: added json object [%s]\n", element_tag);
json_object_object_add(wrkptr, element_tag, njptr);
}
else {
if (!json_object_is_type(njptr, json_type_object)) {
cli_warnmsg("ooxml_content_cb: json object [%s] already exists as not an object\n", element_tag);
return CL_EFORMAT;
}
}
}
else {
njptr = NULL;
}
ret = ooxml_parse_element(reader, njptr, rlvl+1, skip);
if (ret != CL_SUCCESS) {
return ret;
}
break;
case XML_READER_TYPE_END_ELEMENT:
cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
}
if (!skip) {
end_tag = ooxml_check_key(node_name, xmlStrlen(node_name));
if (!end_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
return CL_EFORMAT; /* unrecognized element tag */
}
if (strncmp(element_tag, end_tag, strlen(element_tag))) {
cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
return CL_EFORMAT;
}
}
return CL_SUCCESS;
case XML_READER_TYPE_TEXT:
if (!skip) {
node_value = xmlTextReaderConstValue(reader);
njptr = json_object_object_get(wrkptr, element_tag);
if (njptr) {
cli_warnmsg("ooxml_parse_element: json object [%s] already exists\n", element_tag);
}
if (ooxml_is_int(node_value, xmlStrlen(node_value), &val2)) {
ret = cli_jsonint(wrkptr, element_tag, val2);
}
else if (!xmlStrcmp(node_value, "true")) {
ret = cli_jsonbool(wrkptr, element_tag, 1);
}
else if (!xmlStrcmp(node_value, "false")) {
ret = cli_jsonbool(wrkptr, element_tag, 0);
}
else {
ret = cli_jsonstr(wrkptr, element_tag, node_value);
}
if (ret != CL_SUCCESS)
return ret;
cli_dbgmsg("ooxml_basic_json: added json value [%s: %s]\n", element_tag, node_value);
}
else {
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
cli_dbgmsg("ooxml_parse_element: not adding xml node %s [%d]: %s\n", node_name, node_type, node_value);
}
break;
default:
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value);
return CL_EPARSE;
}
}
return CL_SUCCESS;
}
static int ooxml_parse_document(int fd, cli_ctx *ctx)
{
int ret = CL_SUCCESS;
xmlTextReaderPtr reader = NULL;
cli_dbgmsg("in ooxml_parse_document\n");
reader = xmlReaderForFd(fd, "properties.xml", NULL, 0);
if (reader == NULL) {
cli_dbgmsg("ooxml_parse_document: xmlReaderForFd error\n");
return CL_SUCCESS; // internal error from libxml2
}
/* move reader to first element */
if (xmlTextReaderRead(reader) != 1) {
return CL_SUCCESS; /* libxml2 failed */
}
ret = ooxml_parse_element(reader, ctx->wrkproperty, 0, 0);
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return ret;
}
#endif
#endif
static int ooxml_basic_json(int fd, cli_ctx *ctx, const char *key)
{
int ret = CL_SUCCESS;
@ -179,13 +467,15 @@ static int ooxml_basic_json(int fd, cli_ctx *ctx, const char *key)
static int ooxml_core_cb(int fd, cli_ctx *ctx)
{
cli_dbgmsg("in ooxml_core_cb\n");
return ooxml_basic_json(fd, ctx, "CoreProperties");
return ooxml_parse_document(fd, ctx);
//return ooxml_basic_json(fd, ctx, "CoreProperties");
}
static int ooxml_extn_cb(int fd, cli_ctx *ctx)
{
cli_dbgmsg("in ooxml_extn_cb\n");
return ooxml_basic_json(fd, ctx, "ExtendedProperties");
return ooxml_parse_document(fd, ctx);
//return ooxml_basic_json(fd, ctx, "ExtendedProperties");
}
static int ooxml_content_cb(int fd, cli_ctx *ctx)

Loading…
Cancel
Save