clamav/libclamav/msxml.c

/*
 * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
 * 
 * Copyright (C) 2015 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
 * Copyright (C) 2007-2013 Sourcefire, Inc.
 * 
 * Authors: Kevin Lin
 * 
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License version 2 as published by the
 * Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#include "clamav.h"
#include "others.h"
#include "conv.h"
#include "json_api.h"
#include "msxml.h"
#include "msxml_parser.h"

#if HAVE_LIBXML2
#include <libxml/xmlreader.h>

#define MSXML_VERBIOSE 0
#if MSXML_VERBIOSE
#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
#else
#define cli_msxmlmsg(...)
#endif

#define MSXML_READBUFF SCANBUFF

static const struct key_entry msxml_keys[] = {
    { "worddocument",       "WordDocument",       MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
    { "workbook",           "Workbook",           MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },

    { "bindata",            "BinaryData",         MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT },
    { "documentproperties", "DocumentProperties", MSXML_JSON_ROOT },
    { "author",             "Author",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "lastauthor",         "LastAuthor",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "revision",           "Revision",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "totaltime",          "TotalTime",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "created",            "Created",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "lastsaved",          "LastSaved",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "pages",              "Pages",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "words",              "Words",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "characters",         "Characters",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "lines",              "Lines",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "paragraph",          "Paragraph",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
    { "version",            "Version",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },

    { "allowpng",           "AllowPNG",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },

    { "fonts",              "Fonts",              MSXML_IGNORE_ELEM },
    { "styles",             "Styles",             MSXML_IGNORE_ELEM }
};
static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry);

static inline size_t msxml_read_cb_new_window(struct msxml_cbdata *cbdata)
{
    const unsigned char *new_window = NULL;
    off_t new_mappos;
    size_t bytes;

    if (cbdata->mappos == cbdata->map->len) {
        cli_msxmlmsg("msxml_read_cb: fmap REALLY EOF\n");
        return 0;
    }

    new_mappos = cbdata->mappos + cbdata->winsize;
    bytes = MIN(cbdata->map->len - new_mappos, MSXML_READBUFF);
    if (!bytes) {
        cbdata->window = NULL;
        cbdata->winpos = 0;
        cbdata->mappos = cbdata->map->len;
        cbdata->winsize = 0;

        cli_msxmlmsg("msxml_read_cb: fmap EOF\n");
        return 0;
    }

    new_window = fmap_need_off_once(cbdata->map, new_mappos, bytes);
    if (!new_window) {
        cli_errmsg("msxml_read_cb: cannot acquire new window for fmap\n");
        return -1;
    }

    cbdata->window = new_window;
    cbdata->winpos = 0;
    cbdata->mappos = new_mappos;
    cbdata->winsize = bytes;

    cli_msxmlmsg("msxml_read_cb: acquired new window @ [%llu(+%llu)(max:%llu)]\n",
                 (long long unsigned)cbdata->mappos, (long long unsigned)(cbdata->mappos + cbdata->winsize),
                 (long long unsigned)cbdata->map->len);

    return bytes;
}

int msxml_read_cb(void *ctx, char *buffer, int len)
{
    struct msxml_cbdata *cbdata = (struct msxml_cbdata *)ctx;
    size_t wbytes, rbytes;
    int winret;

    cli_msxmlmsg("msxml_read_cb called\n");

    /* initial iteration */
    if (!cbdata->window) {
        if ((winret = msxml_read_cb_new_window(cbdata)) <= 0)
            return winret;
    }

    cli_msxmlmsg("msxml_read_cb: requested %d bytes from offset %llu\n", len, (long long unsigned)(cbdata->mappos+cbdata->winpos));

    wbytes = 0;
    rbytes = cbdata->winsize - cbdata->winpos;

    /* copying loop with preprocessing */
    while (wbytes < len) {
        const unsigned char *read_from;
        char *write_to = buffer + wbytes;
        enum msxml_state *state;
#if MSXML_VERBIOSE
        size_t written;
#endif

        if (!rbytes) {
            if ((winret = msxml_read_cb_new_window(cbdata)) < 0)
                return winret;
            if (winret == 0) {
                cli_msxmlmsg("msxml_read_cb: propagating fmap EOF [%llu]\n", (long long unsigned)wbytes);
                return (int)wbytes;
            }

            rbytes = cbdata->winsize;
        }

#if MSXML_VERBIOSE
        written = MIN(rbytes, len - wbytes);
        cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->~%llu\n",
                     (long long unsigned)(cbdata->winsize - rbytes), (long long unsigned)cbdata->winsize,
                     (long long unsigned)cbdata->winpos, (long long unsigned)(cbdata->winpos + written));
#endif

        read_from = cbdata->window + cbdata->winpos;
        state = &(cbdata->state);

        while (rbytes > 0 && wbytes < len) {
            switch (*state) {
            case MSXML_STATE_NORMAL:
                if ((*read_from) == '&')
                    *state = MSXML_STATE_ENTITY_START_1;
                break;
            case MSXML_STATE_ENTITY_START_1:
                if ((*read_from) == '#')
                    *state = MSXML_STATE_ENTITY_START_2;
                else
                    *state = MSXML_STATE_NORMAL;
                break;
            case MSXML_STATE_ENTITY_START_2:
                if ((*read_from) == 'x')
                    *state = MSXML_STATE_ENTITY_HEX;
                else if (((*read_from) >= '0') && ((*read_from) <= '9'))
                    *state = MSXML_STATE_ENTITY_DEC;
                else
                    *state = MSXML_STATE_NORMAL;
                break;
            case MSXML_STATE_ENTITY_HEX:
                if ((((*read_from) >= '0') && ((*read_from) <= '9')) ||
                    (((*read_from) >= 'a') && ((*read_from) <= 'f')) ||
                    (((*read_from) >= 'A') && ((*read_from) <= 'F'))) {}
                else
                    *state = MSXML_STATE_ENTITY_CLOSE;
                break;
            case MSXML_STATE_ENTITY_DEC:
                if (((*read_from) >= '0') && ((*read_from) <= '9')) {}
                else
                    *state = MSXML_STATE_ENTITY_CLOSE;
                break;
            default:
                cli_errmsg("unknown *state: %d\n", *state);
            }

            if (*state == MSXML_STATE_ENTITY_CLOSE) {
                if ((*read_from) != ';') {
                    cli_msxmlmsg("msxml_read_cb: detected unterminated character entity @ winoff %d\n",
                                 (int)(read_from - cbdata->window));
                    (*write_to++) = ';';
                    wbytes++;
                }
                *state = MSXML_STATE_NORMAL;
                if (wbytes >= len)
                    break;
            }

            *(write_to++) = *(read_from++);
            rbytes--;
            wbytes++;
        }
    }

    cbdata->winpos = cbdata->winsize - rbytes;
    return (int)wbytes;
}
#endif

int cli_scanmsxml(cli_ctx *ctx)
{
#if HAVE_LIBXML2
    struct msxml_cbdata cbdata;
    xmlTextReaderPtr reader = NULL;
    int state, ret = CL_SUCCESS;

    cli_dbgmsg("in cli_scanmsxml()\n");

    if (!ctx)
        return CL_ENULLARG;

    memset(&cbdata, 0, sizeof(cbdata));
    cbdata.map = *ctx->fmap;

    reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "msxml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
    if (!reader) {
        cli_dbgmsg("cli_scanmsxml: cannot initialize xmlReader\n");

#if HAVE_JSON
        ret = cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_IO");
#endif
        return ret; // libxml2 failed!
    }

    ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1, NULL);

    xmlTextReaderClose(reader);
    xmlFreeTextReader(reader);
    return ret;
#else
    UNUSEDPARAM(ctx);
    cli_dbgmsg("in cli_scanmsxml()\n");
    cli_dbgmsg("cli_scanmsxml: scanning msxml documents requires libxml2!\n");

    return CL_SUCCESS;
#endif
}
added new source files to handle msxml files 10 years ago			`/*`
			`* Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)`
			`*`
mass update of copyright headers 10 years ago			`* Copyright (C) 2015 Cisco Systems, Inc. and/or its affiliates. All rights reserved.`
added new source files to handle msxml files 10 years ago			`* Copyright (C) 2007-2013 Sourcefire, Inc.`
			`*`
			`* Authors: Kevin Lin`
			`*`
			`* This program is free software; you can redistribute it and/or modify it under`
			`* the terms of the GNU General Public License version 2 as published by the`
			`* Free Software Foundation.`
			`*`
			`* This program is distributed in the hope that it will be useful, but WITHOUT`
			`* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or`
			`* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for`
			`* more details.`
			`*`
			`* You should have received a copy of the GNU General Public License along with`
			`* this program; if not, write to the Free Software Foundation, Inc., 51`
			`* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.`
			`*/`

added basic framework for msxml processing 10 years ago			`#if HAVE_CONFIG_H`
			`#include "clamav-config.h"`
			`#endif`

msxml: added extracting, converting, and scanning contained binary data 10 years ago			`#include <sys/types.h>`
			`#include <sys/stat.h>`
			`#include <fcntl.h>`

added new source files to handle msxml files 10 years ago			`#include "clamav.h"`
			`#include "others.h"`
msxml: added extracting, converting, and scanning contained binary data 10 years ago			`#include "conv.h"`
added basic framework for msxml processing 10 years ago			`#include "json_api.h"`
			`#include "msxml.h"`
added new source file for shared code between ooxml and msxml 10 years ago			`#include "msxml_parser.h"`
added basic framework for msxml processing 10 years ago
			`#if HAVE_LIBXML2`
			`#include <libxml/xmlreader.h>`

code optimiztions and clean-up 10 years ago			`#define MSXML_VERBIOSE 0`
msxml: added verbiose debugging mechanism 10 years ago			`#if MSXML_VERBIOSE`
			`#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)`
			`#else`
			`#define cli_msxmlmsg(...)`
			`#endif`

msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`#define MSXML_READBUFF SCANBUFF`

added new source file for shared code between ooxml and msxml 10 years ago			`static const struct key_entry msxml_keys[] = {`
added key entries for parsing MS 2003 XML documents 10 years ago			`{ "worddocument", "WordDocument", MSXML_JSON_ROOT \| MSXML_JSON_ATTRIB },`
			`{ "workbook", "Workbook", MSXML_JSON_ROOT \| MSXML_JSON_ATTRIB },`

			`{ "bindata", "BinaryData", MSXML_SCAN_B64 \| MSXML_JSON_COUNT \| MSXML_JSON_ROOT },`
added new source file for shared code between ooxml and msxml 10 years ago			`{ "documentproperties", "DocumentProperties", MSXML_JSON_ROOT },`
added key entries for parsing MS 2003 XML documents 10 years ago			`{ "author", "Author", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "lastauthor", "LastAuthor", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "revision", "Revision", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "totaltime", "TotalTime", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "created", "Created", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "lastsaved", "LastSaved", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "pages", "Pages", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "words", "Words", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "characters", "Characters", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "lines", "Lines", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "paragraph", "Paragraph", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
			`{ "version", "Version", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`

set 'AllowPNG' in msxml documents to record JSON value 10 years ago			`{ "allowpng", "AllowPNG", MSXML_JSON_WRKPTR \| MSXML_JSON_VALUE },`
added key entries for parsing MS 2003 XML documents 10 years ago
			`{ "fonts", "Fonts", MSXML_IGNORE_ELEM },`
			`{ "styles", "Styles", MSXML_IGNORE_ELEM }`
added new source file for shared code between ooxml and msxml 10 years ago			`};`
			`static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry);`
added basic framework for msxml processing 10 years ago
msxml: clang compiler fix 10 years ago			`static inline size_t msxml_read_cb_new_window(struct msxml_cbdata *cbdata)`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`{`
			`const unsigned char *new_window = NULL;`
			`off_t new_mappos;`
			`size_t bytes;`

			`if (cbdata->mappos == cbdata->map->len) {`
msxml: added verbiose debugging mechanism 10 years ago			`cli_msxmlmsg("msxml_read_cb: fmap REALLY EOF\n");`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`return 0;`
			`}`

			`new_mappos = cbdata->mappos + cbdata->winsize;`
			`bytes = MIN(cbdata->map->len - new_mappos, MSXML_READBUFF);`
			`if (!bytes) {`
			`cbdata->window = NULL;`
			`cbdata->winpos = 0;`
			`cbdata->mappos = cbdata->map->len;`
			`cbdata->winsize = 0;`

msxml: added verbiose debugging mechanism 10 years ago			`cli_msxmlmsg("msxml_read_cb: fmap EOF\n");`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`return 0;`
			`}`

			`new_window = fmap_need_off_once(cbdata->map, new_mappos, bytes);`
			`if (!new_window) {`
			`cli_errmsg("msxml_read_cb: cannot acquire new window for fmap\n");`
			`return -1;`
			`}`

			`cbdata->window = new_window;`
			`cbdata->winpos = 0;`
			`cbdata->mappos = new_mappos;`
			`cbdata->winsize = bytes;`

msxml: added verbiose debugging mechanism 10 years ago			`cli_msxmlmsg("msxml_read_cb: acquired new window @ [%llu(+%llu)(max:%llu)]\n",`
			`(long long unsigned)cbdata->mappos, (long long unsigned)(cbdata->mappos + cbdata->winsize),`
			`(long long unsigned)cbdata->map->len);`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
			`return bytes;`
			`}`

			`int msxml_read_cb(void ctx, char buffer, int len)`
			`{`
			`struct msxml_cbdata cbdata = (struct msxml_cbdata )ctx;`
fixed coverity ID 12110 12111 changed a the type of a value from unsigned to signed due to possible negative values 10 years ago			`size_t wbytes, rbytes;`
			`int winret;`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
msxml: added verbiose debugging mechanism 10 years ago			`cli_msxmlmsg("msxml_read_cb called\n");`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
			`/* initial iteration */`
			`if (!cbdata->window) {`
			`if ((winret = msxml_read_cb_new_window(cbdata)) <= 0)`
			`return winret;`
			`}`

msxml: added verbiose debugging mechanism 10 years ago			`cli_msxmlmsg("msxml_read_cb: requested %d bytes from offset %llu\n", len, (long long unsigned)(cbdata->mappos+cbdata->winpos));`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
			`wbytes = 0;`
			`rbytes = cbdata->winsize - cbdata->winpos;`

xmlreader unterminated char entity pre-processing 10 years ago			`/* copying loop with preprocessing */`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`while (wbytes < len) {`
xmlreader unterminated char entity pre-processing 10 years ago			`const unsigned char *read_from;`
			`char *write_to = buffer + wbytes;`
			`enum msxml_state *state;`
			`#if MSXML_VERBIOSE`
			`size_t written;`
			`#endif`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
			`if (!rbytes) {`
			`if ((winret = msxml_read_cb_new_window(cbdata)) < 0)`
			`return winret;`
			`if (winret == 0) {`
msxml: added verbiose debugging mechanism 10 years ago			`cli_msxmlmsg("msxml_read_cb: propagating fmap EOF [%llu]\n", (long long unsigned)wbytes);`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`return (int)wbytes;`
			`}`

			`rbytes = cbdata->winsize;`
			`}`

xmlreader unterminated char entity pre-processing 10 years ago			`#if MSXML_VERBIOSE`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`written = MIN(rbytes, len - wbytes);`
xmlreader unterminated char entity pre-processing 10 years ago			`cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->~%llu\n",`
msxml: added verbiose debugging mechanism 10 years ago			`(long long unsigned)(cbdata->winsize - rbytes), (long long unsigned)cbdata->winsize,`
			`(long long unsigned)cbdata->winpos, (long long unsigned)(cbdata->winpos + written));`
xmlreader unterminated char entity pre-processing 10 years ago			`#endif`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
xmlreader unterminated char entity pre-processing 10 years ago			`read_from = cbdata->window + cbdata->winpos;`
			`state = &(cbdata->state);`

			`while (rbytes > 0 && wbytes < len) {`
			`switch (*state) {`
			`case MSXML_STATE_NORMAL:`
			`if ((*read_from) == '&')`
			`*state = MSXML_STATE_ENTITY_START_1;`
			`break;`
			`case MSXML_STATE_ENTITY_START_1:`
			`if ((*read_from) == '#')`
			`*state = MSXML_STATE_ENTITY_START_2;`
			`else`
			`*state = MSXML_STATE_NORMAL;`
			`break;`
			`case MSXML_STATE_ENTITY_START_2:`
			`if ((*read_from) == 'x')`
			`*state = MSXML_STATE_ENTITY_HEX;`
			`else if (((read_from) >= '0') && ((read_from) <= '9'))`
			`*state = MSXML_STATE_ENTITY_DEC;`
			`else`
			`*state = MSXML_STATE_NORMAL;`
			`break;`
			`case MSXML_STATE_ENTITY_HEX:`
			`if ((((read_from) >= '0') && ((read_from) <= '9')) \|\|`
			`(((read_from) >= 'a') && ((read_from) <= 'f')) \|\|`
			`(((read_from) >= 'A') && ((read_from) <= 'F'))) {}`
			`else`
			`*state = MSXML_STATE_ENTITY_CLOSE;`
			`break;`
			`case MSXML_STATE_ENTITY_DEC:`
			`if (((read_from) >= '0') && ((read_from) <= '9')) {}`
			`else`
			`*state = MSXML_STATE_ENTITY_CLOSE;`
			`break;`
			`default:`
			`cli_errmsg("unknown state: %d\n", state);`
			`}`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago
xmlreader unterminated char entity pre-processing 10 years ago			`if (*state == MSXML_STATE_ENTITY_CLOSE) {`
			`if ((*read_from) != ';') {`
			`cli_msxmlmsg("msxml_read_cb: detected unterminated character entity @ winoff %d\n",`
			`(int)(read_from - cbdata->window));`
			`(*write_to++) = ';';`
			`wbytes++;`
			`}`
			`*state = MSXML_STATE_NORMAL;`
			`if (wbytes >= len)`
			`break;`
			`}`

			`(write_to++) = (read_from++);`
			`rbytes--;`
			`wbytes++;`
			`}`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`}`

			`cbdata->winpos = cbdata->winsize - rbytes;`
			`return (int)wbytes;`
			`}`
msxml: basic framework for processing xml document 10 years ago			`#endif`

added basic framework for msxml processing 10 years ago			`int cli_scanmsxml(cli_ctx *ctx)`
added new source files to handle msxml files 10 years ago			`{`
added basic framework for msxml processing 10 years ago			`#if HAVE_LIBXML2`
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`struct msxml_cbdata cbdata;`
added basic framework for msxml processing 10 years ago			`xmlTextReaderPtr reader = NULL;`
			`int state, ret = CL_SUCCESS;`

			`cli_dbgmsg("in cli_scanmsxml()\n");`

msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`if (!ctx)`
			`return CL_ENULLARG;`

			`memset(&cbdata, 0, sizeof(cbdata));`
			`cbdata.map = *ctx->fmap;`
added basic framework for msxml processing 10 years ago
msxml: using piecewise parsing over contiguous parsing reason: big files could cause issues for fmap contiguous buffering 10 years ago			`reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "msxml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);`
added basic framework for msxml processing 10 years ago			`if (!reader) {`
Spelling Adjustments (#30) * spelling: accessed * spelling: alignment * spelling: amalgamated * spelling: answers * spelling: another * spelling: acquisition * spelling: apitid * spelling: ascii * spelling: appending * spelling: appropriate * spelling: arbitrary * spelling: architecture * spelling: asynchronous * spelling: attachments * spelling: argument * spelling: authenticode * spelling: because * spelling: boundary * spelling: brackets * spelling: bytecode * spelling: calculation * spelling: cannot * spelling: changes * spelling: check * spelling: children * spelling: codegen * spelling: commands * spelling: container * spelling: concatenated * spelling: conditions * spelling: continuous * spelling: conversions * spelling: corresponding * spelling: corrupted * spelling: coverity * spelling: crafting * spelling: daemon * spelling: definition * spelling: delivered * spelling: delivery * spelling: delimit * spelling: dependencies * spelling: dependency * spelling: detection * spelling: determine * spelling: disconnects * spelling: distributed * spelling: documentation * spelling: downgraded * spelling: downloading * spelling: endianness * spelling: entities * spelling: especially * spelling: empty * spelling: expected * spelling: explicitly * spelling: existent * spelling: finished * spelling: flexibility * spelling: flexible * spelling: freshclam * spelling: functions * spelling: guarantee * spelling: hardened * spelling: headaches * spelling: heighten * spelling: improper * spelling: increment * spelling: indefinitely * spelling: independent * spelling: inaccessible * spelling: infrastructure Conflicts: docs/html/node68.html * spelling: initializing * spelling: inited * spelling: instream * spelling: installed * spelling: initialization * spelling: initialize * spelling: interface * spelling: intrinsics * spelling: interpreter * spelling: introduced * spelling: invalid * spelling: latency * spelling: lawyers * spelling: libclamav * spelling: likelihood * spelling: loop * spelling: maximum * spelling: million * spelling: milliseconds * spelling: minimum * spelling: minzhuan * spelling: multipart * spelling: misled * spelling: modifiers * spelling: notifying * spelling: objects * spelling: occurred * spelling: occurs * spelling: occurrences * spelling: optimization * spelling: original * spelling: originated * spelling: output * spelling: overridden * spelling: parenthesis * spelling: partition * spelling: performance * spelling: permission * spelling: phishing * spelling: portions * spelling: positives * spelling: preceded * spelling: properties * spelling: protocol * spelling: protos * spelling: quarantine * spelling: recursive * spelling: referring * spelling: reorder * spelling: reset * spelling: resources * spelling: resume * spelling: retrieval * spelling: rewrite * spelling: sanity * spelling: scheduled * spelling: search * spelling: section * spelling: separator * spelling: separated * spelling: specify * spelling: special * spelling: statement * spelling: streams * spelling: succession * spelling: suggests * spelling: superfluous * spelling: suspicious * spelling: synonym * spelling: temporarily * spelling: testfiles * spelling: transverse * spelling: turkish * spelling: typos * spelling: unable * spelling: unexpected * spelling: unexpectedly * spelling: unfinished * spelling: unfortunately * spelling: uninitialized * spelling: unlocking * spelling: unnecessary * spelling: unpack * spelling: unrecognized * spelling: unsupported * spelling: usable * spelling: wherever * spelling: wishlist * spelling: white * spelling: infrastructure * spelling: directories * spelling: overridden * spelling: permission * spelling: yesterday * spelling: initialization * spelling: intrinsics * space adjustment for spelling changes * minor modifications by klin 7 years ago			`cli_dbgmsg("cli_scanmsxml: cannot initialize xmlReader\n");`
json_api: added parse error reporting function msxml: added parsing error reporting to preclass json 10 years ago
			`#if HAVE_JSON`
			`ret = cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_IO");`
			`#endif`
			`return ret; // libxml2 failed!`
added basic framework for msxml processing 10 years ago			`}`

msxml_parser: add callback-based scanning mechanism 10 years ago			`ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1, NULL);`
msxml: basic framework for processing xml document 10 years ago
			`xmlTextReaderClose(reader);`
			`xmlFreeTextReader(reader);`
added basic framework for msxml processing 10 years ago			`return ret;`
			`#else`
			`UNUSEDPARAM(ctx);`
			`cli_dbgmsg("in cli_scanmsxml()\n");`
added newlines for lack of libxml2 support 10 years ago			`cli_dbgmsg("cli_scanmsxml: scanning msxml documents requires libxml2!\n");`
added basic framework for msxml processing 10 years ago
added new source files to handle msxml files 10 years ago			`return CL_SUCCESS;`
added basic framework for msxml processing 10 years ago			`#endif`
added new source files to handle msxml files 10 years ago			`}`