Fixes to a handful of bugs identified during regression testing of PDF and UnRAR changes.

Fix for minor memory leak in fmap_dump_to_file(). Fix to PDF object stream logic, accounting for a realloc() issue when the only pdf object stream fails to parse, and for when pdf objects in a stream appear to extend further than the size of the stream. Fix for memory leak cleaning up PDF object stream buffer in error condition. Fix to bug in pdf_decodestream wherein objects were found in an object stream, but the object stream could later be free'd if max scansize was exceeded, resulting in a NULL dereference. General cleanup of pdf_decodestream/pdf_decodestream_internal exit code logic.
7 years ago · d77b8ae0fb
parent 24f225c21f
commit d77b8ae0fb
4 changed files with 115 additions and 80 deletions
--- a/libclamav/fmap.c
+++ b/libclamav/fmap.c
@ -750,7 +750,6 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd

    char* filebase = NULL;
    char* prefix = NULL;
-    uint32_t prefix_allocated = 0;

    char* tmpname = NULL;
    int tmpfd = -1;
@ -782,11 +781,13 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
                return CL_EMEM;
            }
            snprintf(prefix, prefix_len, "%s.%zu-%zu", filebase, start_offset, end_offset);
-            prefix_allocated = 1;
+
+            free(filebase);
+            filebase = NULL;
        } else {
            /* Else if we're dumping the whole thing, use the filebase as the prefix */
            prefix = filebase;
-            prefix_allocated = 0;
+            filebase = NULL;
        }
    }

@ -794,18 +795,16 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
    ret = cli_gentempfd_with_prefix(tmpdir, prefix, &tmpname, &tmpfd);
    if (ret != CL_SUCCESS) {
        cli_dbgmsg("fmap_dump_to_file: failed to generate temporary file.\n");
-        if ((NULL != prefix) && (prefix_allocated)) {
+        if (NULL != prefix) {
            free(prefix);
            prefix = NULL;
-            prefix_allocated = 0;
        }
        return ret;
    }

-    if ((NULL != prefix) && (prefix_allocated)) {
+    if (NULL != prefix) {
        free(prefix);
        prefix = NULL;
-        prefix_allocated = 0;
    }

    do {
--- a/libclamav/pdf.c
+++ b/libclamav/pdf.c
@ -397,8 +397,20 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm,
            status = CL_EPARSE;
            goto done;
        }
+        else if (next_objoff <= objoff) {
+            /* Failed to find obj offset for next obj */
+            cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's less than or equal to the current one!\n");
+            status = CL_EPARSE;
+            goto done;
+        }
+        else if (objstm->first + next_objoff > objstm->streambuf_len) {
+            /* Failed to find obj offset for next obj */
+            cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's further out than the size of the stream!\n");
+            status = CL_EPARSE;
+            goto done;
+        }

-        obj->size = objstm->first + next_objoff - obj->start;
+        obj->size = next_objoff - objoff;
    } 
    else 
    {
@ -1364,15 +1376,20 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
 {
    cli_ctx *ctx = pdf->ctx;
    char fullname[NAME_MAX + 1];
-    int fout;
-    ptrdiff_t sum = 0;
+    int fout = -1;
+    size_t sum = 0;
    cl_error_t rc = CL_SUCCESS;
    int dump = 1;

    cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);

-    if (obj->objstm)
+    if (obj->objstm) {
        cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n");
+        if (obj->objstm->streambuf == NULL) {
+            cli_warnmsg("pdf_extract_obj: object in object stream has null stream buffer!\n");
+            return CL_EFORMAT;
+        }
+    }

    /* TODO: call bytecode hook here, allow override dumpability */
    if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
@ -1584,17 +1601,25 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                }

                sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc, objstm);
-                if (sum < 0) {
-                    /*
-                    * If we were expecting an objstm and there was a failure...
-                    *   discard the memory for last object stream.
-                    */
-                    if (NULL != objstm)
-                    {
+                if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) {
+                    cli_dbgmsg("Error decoding stream! Error code: %d\n", rc);
+
+                    /* It's ok if we couldn't decode the stream,
+                     *   make a best effort to keep parsing. */
+                    if (CL_EPARSE == rc)
+                        rc = CL_SUCCESS;
+
+                    if (NULL != objstm) {
+                        /*
+                         * If we were expecting an objstm and there was a failure...
+                         *   discard the memory for last object stream.
+                         */
                        if (NULL != pdf->objstms) {
                            if (NULL != pdf->objstms[pdf->nobjstms - 1]) {
-                                pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
-
+                                if (NULL != pdf->objstms[pdf->nobjstms - 1]->streambuf) {
+                                    free(pdf->objstms[pdf->nobjstms - 1]->streambuf);
+                                    pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
+                                }
                                free(pdf->objstms[pdf->nobjstms - 1]);
                                pdf->objstms[pdf->nobjstms - 1] = NULL;
                            }
@ -1602,11 +1627,16 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                            /* Pop the objstm off the end of the pdf->objstms array. */
                            if (pdf->nobjstms > 0) {
                                pdf->nobjstms--;
-                                pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
-
-                                if (!pdf->objstms) {
-                                    cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
-                                    return CL_EMEM;
+                                if (0 == pdf->nobjstms) {
+                                    free(pdf->objstms);
+                                    pdf->objstms = NULL;
+                                } else {
+                                    pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
+
+                                    if (!pdf->objstms) {
+                                        cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
+                                        return CL_EMEM;
+                                    }
                                }
                            } else {
                                /* hm.. this shouldn't happen */
@ -1619,7 +1649,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
                if (dparams)
                    pdf_free_dict(dparams);

-                if (sum < 0 || ((rc == CL_VIRUS) && !SCAN_ALLMATCHES)) {
+                if ((rc == CL_VIRUS) && !SCAN_ALLMATCHES) {
                    sum = 0; /* prevents post-filter scan */
                    break;
                }
--- a/libclamav/pdfdecode.c
+++ b/libclamav/pdfdecode.c
@ -76,7 +76,7 @@ struct pdf_token {
    uint8_t *content;  /* content stream */
 };

-static ptrdiff_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
+static size_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
 static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl);

 static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
@ -101,34 +101,36 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
 * @param fout      File descriptor to write to to be scanned.
 * @param[out] rc   Return code ()
 * @param objstm    (optional) Object stream context structure.
- * @return ptrdiff_t   The number of bytes written to 'fout' to be scanned. -1 if failed out.
+ * @return size_t   The number of bytes written to 'fout' to be scanned.
 */
-ptrdiff_t pdf_decodestream(
+size_t pdf_decodestream(
    struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
    const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
    struct objstm_struct *objstm)
 {
    struct pdf_token *token = NULL;
-    ptrdiff_t bytes_scanned = -1;
-    cl_error_t retval = CL_SUCCESS;
+    size_t bytes_scanned = 0;
+    cli_ctx *ctx = pdf->ctx;

    if (!status) {
        /* invalid args, and no way to pass back the status code */
-        return -1;
+        return 0;
    }

    if (!pdf || !obj) {
        /* Invalid args */
-        retval = CL_EARG;
+        *status = CL_EARG;
        goto done;
    }

    if (!stream || !streamlen || fout < 0) {
        cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff);
-        retval = CL_ENULLARG;
+        *status = CL_ENULLARG;
        goto done;
    }

+    *status = CL_SUCCESS;
+
 #if 0
    if (params)
        pdf_print_dict(params, 0);
@ -136,7 +138,7 @@ ptrdiff_t pdf_decodestream(

    token = cli_malloc(sizeof(struct pdf_token));
    if (!token) {
-        retval = CL_EMEM;
+        *status = CL_EMEM;
        goto done;
    }

@ -149,7 +151,7 @@ ptrdiff_t pdf_decodestream(
    token->content = cli_malloc(streamlen);
    if (!token->content) {
        free(token);
-        retval = CL_EMEM;
+        *status = CL_EMEM;
        goto done;
    }
    memcpy(token->content, stream, streamlen);
@ -157,38 +159,35 @@ ptrdiff_t pdf_decodestream(

    cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters));

-    bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, &retval, objstm);
-    /* 
-     * Pass back the return value, though we really only care
-     * if it is CV_VIRUS or CL_SUCCESS.
-     */
-    if (retval == CL_VIRUS)
-        retval = CL_VIRUS;
-    else
-        retval = CL_SUCCESS;
+    bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, status, objstm);
+
+    if ((CL_VIRUS == *status) && !SCAN_ALLMATCHES) {
+        goto done;
+    }

-    if (!token->success) {
+    if (0 == token->success) {
        /*
-         * If it was successful, the internal() function calls cli_writen()
-         * However, in this case... no non-forced filter are decoded, 
-         *   so return the raw stream.
+         * Either:
+         *  a) it failed to decode any filters, or
+         *  b) there were no filters.
+         *
+         * Write out the raw stream to be scanned.
+         *
+         * Nota bene: If it did decode any filters, the internal() function would
+         *            have written out the decoded stream to be scanned.
         */
        if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) {
            cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n");

            if (cli_writen(fout, stream, streamlen) != streamlen) {
-                cli_errmsg("pdf_decodestream: failed to write output file\n");
-                retval = CL_EWRITE;
-                bytes_scanned = -1;
-                goto done;
+                cli_errmsg("pdf_decodestream: failed to write raw stream to output file\n");
+            } else {
+                bytes_scanned = streamlen;
            }
-            bytes_scanned = streamlen;
        }
    }

 done:
-    *status = retval;
-
    /*
     * Free up the token, and token content, if any.
     */
@ -220,28 +219,30 @@ done:
 * @param objstm        (optional) Object stream context structure.
 * @return ptrdiff_t    The number of bytes we wrote to 'fout'. -1 if failed out.
 */
-static ptrdiff_t pdf_decodestream_internal(
+static size_t pdf_decodestream_internal(
    struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
    struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm)
 {
    cl_error_t vir = CL_CLEAN;
    cl_error_t retval = CL_SUCCESS;
-    ptrdiff_t bytes_scanned = -1;
+    size_t bytes_scanned = 0;
    cli_ctx *ctx = pdf->ctx;
    const char *filter = NULL;
    int i;

    if (!status) {
        /* invalid args, and no way to pass back the status code */
-        return -1;
+        return 0;
    }

    if (!pdf || !obj || !token) {
        /* Invalid args */
-        retval = CL_EARG;
+        *status = CL_EARG;
        goto done;
    }
    
+    *status = CL_SUCCESS;
+    
    /*
     * if pdf is decryptable, scan for CRYPT filter
     * if none, force a DECRYPT filter application
@ -253,6 +254,7 @@ static ptrdiff_t pdf_decodestream_internal(
            cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n");
            retval = filter_decrypt(pdf, obj, params, token, 1);
            if (retval != CL_SUCCESS) {
+                *status = CL_EPARSE;
                goto done;
            }
        }
@ -323,12 +325,15 @@ static ptrdiff_t pdf_decodestream_internal(

                switch (retval) {
                case CL_VIRUS:
+                    *status = CL_VIRUS;
                    reason = "detection";
                    break;
                case CL_BREAK:
+                    *status = CL_SUCCESS;
                    reason = "decoding break";
                    break;
                default:
+                    *status = CL_EPARSE;
                    reason = "decoding error";
                    break;
                }
@ -341,31 +346,35 @@ static ptrdiff_t pdf_decodestream_internal(

        /* Dump the stream content to a text file if keeptmp is enabled. */
        if (pdf->ctx->engine->keeptmp) {
-            retval = pdf_decode_dump(pdf, obj, token, i+1);
-            if (retval != CL_SUCCESS) {
-                goto done;
+            if (CL_SUCCESS != pdf_decode_dump(pdf, obj, token, i+1)) {
+                cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to temp file\n");
            }
        }
    }

    if (token->success > 0) {
        /*
-         * Looks like we successfully decoded the stream, so lets write it out.
-         *   In the failure case, the caller will deal with the raw stream.
+         * Looks like we successfully decoded some or all of the stream filters,
+         * so lets write it out to a file descriptor we scan.
+         *
+         * In the event that we didn't decode any filters (or maybe there
+         * weren't any filters), the calling function will do the same with
+         * the raw stream.
         */
-        if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
+        if (CL_SUCCESS == cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
            if (cli_writen(fout, token->content, token->length) != token->length) {
-                cli_errmsg("pdf_decodestream_internal: failed to write output file\n");
-                retval = CL_EWRITE;
-                bytes_scanned = -1;
-                goto done;
+                cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to output file\n");
+            } else {
+                bytes_scanned = token->length;
            }
-            bytes_scanned = token->length;
        }
    }

-    if (NULL != objstm)
+    if ((NULL != objstm) &&
+        ((CL_SUCCESS == *status) || ((CL_VIRUS == *status) && SCAN_ALLMATCHES)))
    {
+        int objs_found = pdf->nobjs;
+
        /*
         * The caller indicated that the decoded data is an object stream.
         * Perform experimental object stream parsing to extract objects from the stream.
@ -377,7 +386,9 @@ static ptrdiff_t pdf_decodestream_internal(
        token->content = NULL;
        token->length = 0;

-        int objs_found = pdf->nobjs;
+        /* Don't store the result. It's ok if some or all objects failed to parse.
+           It would be far worse to add objects from a stream to the list, and then free
+           the stream buffer due to an "error". */
        if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm))
        {
            cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n");
@ -392,14 +403,9 @@ static ptrdiff_t pdf_decodestream_internal(

 done:

-    *status = retval;
-
    if (vir == CL_VIRUS)
        *status = CL_VIRUS;

-    if (*status == CL_BREAK)
-        *status = CL_SUCCESS;
-
    return bytes_scanned;
 }

--- a/libclamav/pdfdecode.h
+++ b/libclamav/pdfdecode.h
@ -51,9 +51,9 @@
 * @param fout      File descriptor to write to a temp file.
 * @param[out] rc   Return code ()
 * @param objstm    Object stream context structure.
- * @return ptrdiff_t 
+ * @return size_t   The number of bytes written to fout to be scanned.
 */
-ptrdiff_t pdf_decodestream(
+size_t pdf_decodestream(
    struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
    const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
    struct objstm_struct *objstm);