Fixes to a handful of bugs identified during regression testing of PDF and UnRAR changes.

Fix for minor memory leak in fmap_dump_to_file().
Fix to PDF object stream logic, accounting for a realloc() issue when the only pdf object stream fails to parse, and for when pdf objects in a stream appear to extend further than the size of the stream.
Fix for memory leak cleaning up PDF object stream buffer in error condition.
Fix to bug in pdf_decodestream wherein objects were found in an object stream, but the object stream could later be free'd if max scansize was exceeded, resulting in a NULL dereference.
General cleanup of pdf_decodestream/pdf_decodestream_internal exit code logic.
pull/51/head
Micah Snyder 7 years ago
parent 24f225c21f
commit d77b8ae0fb
  1. 13
      libclamav/fmap.c
  2. 68
      libclamav/pdf.c
  3. 110
      libclamav/pdfdecode.c
  4. 4
      libclamav/pdfdecode.h

@ -750,7 +750,6 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
char* filebase = NULL;
char* prefix = NULL;
uint32_t prefix_allocated = 0;
char* tmpname = NULL;
int tmpfd = -1;
@ -782,11 +781,13 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
return CL_EMEM;
}
snprintf(prefix, prefix_len, "%s.%zu-%zu", filebase, start_offset, end_offset);
prefix_allocated = 1;
free(filebase);
filebase = NULL;
} else {
/* Else if we're dumping the whole thing, use the filebase as the prefix */
prefix = filebase;
prefix_allocated = 0;
filebase = NULL;
}
}
@ -794,18 +795,16 @@ cl_error_t fmap_dump_to_file(fmap_t* map, const char* filepath, const char* tmpd
ret = cli_gentempfd_with_prefix(tmpdir, prefix, &tmpname, &tmpfd);
if (ret != CL_SUCCESS) {
cli_dbgmsg("fmap_dump_to_file: failed to generate temporary file.\n");
if ((NULL != prefix) && (prefix_allocated)) {
if (NULL != prefix) {
free(prefix);
prefix = NULL;
prefix_allocated = 0;
}
return ret;
}
if ((NULL != prefix) && (prefix_allocated)) {
if (NULL != prefix) {
free(prefix);
prefix = NULL;
prefix_allocated = 0;
}
do {

@ -397,8 +397,20 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm,
status = CL_EPARSE;
goto done;
}
else if (next_objoff <= objoff) {
/* Failed to find obj offset for next obj */
cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's less than or equal to the current one!\n");
status = CL_EPARSE;
goto done;
}
else if (objstm->first + next_objoff > objstm->streambuf_len) {
/* Failed to find obj offset for next obj */
cli_dbgmsg("pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's further out than the size of the stream!\n");
status = CL_EPARSE;
goto done;
}
obj->size = objstm->first + next_objoff - obj->start;
obj->size = next_objoff - objoff;
}
else
{
@ -1364,15 +1376,20 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
{
cli_ctx *ctx = pdf->ctx;
char fullname[NAME_MAX + 1];
int fout;
ptrdiff_t sum = 0;
int fout = -1;
size_t sum = 0;
cl_error_t rc = CL_SUCCESS;
int dump = 1;
cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
if (obj->objstm)
if (obj->objstm) {
cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n");
if (obj->objstm->streambuf == NULL) {
cli_warnmsg("pdf_extract_obj: object in object stream has null stream buffer!\n");
return CL_EFORMAT;
}
}
/* TODO: call bytecode hook here, allow override dumpability */
if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
@ -1584,17 +1601,25 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
}
sum = pdf_decodestream(pdf, obj, dparams, start + p_stream, (uint32_t)length, xref, fout, &rc, objstm);
if (sum < 0) {
/*
* If we were expecting an objstm and there was a failure...
* discard the memory for last object stream.
*/
if (NULL != objstm)
{
if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) {
cli_dbgmsg("Error decoding stream! Error code: %d\n", rc);
/* It's ok if we couldn't decode the stream,
* make a best effort to keep parsing. */
if (CL_EPARSE == rc)
rc = CL_SUCCESS;
if (NULL != objstm) {
/*
* If we were expecting an objstm and there was a failure...
* discard the memory for last object stream.
*/
if (NULL != pdf->objstms) {
if (NULL != pdf->objstms[pdf->nobjstms - 1]) {
pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
if (NULL != pdf->objstms[pdf->nobjstms - 1]->streambuf) {
free(pdf->objstms[pdf->nobjstms - 1]->streambuf);
pdf->objstms[pdf->nobjstms - 1]->streambuf = NULL;
}
free(pdf->objstms[pdf->nobjstms - 1]);
pdf->objstms[pdf->nobjstms - 1] = NULL;
}
@ -1602,11 +1627,16 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
/* Pop the objstm off the end of the pdf->objstms array. */
if (pdf->nobjstms > 0) {
pdf->nobjstms--;
pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
if (!pdf->objstms) {
cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
return CL_EMEM;
if (0 == pdf->nobjstms) {
free(pdf->objstms);
pdf->objstms = NULL;
} else {
pdf->objstms = cli_realloc2(pdf->objstms, sizeof(struct objstm_struct*) * pdf->nobjstms);
if (!pdf->objstms) {
cli_warnmsg("pdf_extract_obj: out of memory when shrinking down objstm array\n");
return CL_EMEM;
}
}
} else {
/* hm.. this shouldn't happen */
@ -1619,7 +1649,7 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
if (dparams)
pdf_free_dict(dparams);
if (sum < 0 || ((rc == CL_VIRUS) && !SCAN_ALLMATCHES)) {
if ((rc == CL_VIRUS) && !SCAN_ALLMATCHES) {
sum = 0; /* prevents post-filter scan */
break;
}

@ -76,7 +76,7 @@ struct pdf_token {
uint8_t *content; /* content stream */
};
static ptrdiff_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
static size_t pdf_decodestream_internal(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm);
static cl_error_t pdf_decode_dump(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token, int lvl);
static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_token *token);
@ -101,34 +101,36 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
* @param fout File descriptor to write to to be scanned.
* @param[out] rc Return code ()
* @param objstm (optional) Object stream context structure.
* @return ptrdiff_t The number of bytes written to 'fout' to be scanned. -1 if failed out.
* @return size_t The number of bytes written to 'fout' to be scanned.
*/
ptrdiff_t pdf_decodestream(
size_t pdf_decodestream(
struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
struct objstm_struct *objstm)
{
struct pdf_token *token = NULL;
ptrdiff_t bytes_scanned = -1;
cl_error_t retval = CL_SUCCESS;
size_t bytes_scanned = 0;
cli_ctx *ctx = pdf->ctx;
if (!status) {
/* invalid args, and no way to pass back the status code */
return -1;
return 0;
}
if (!pdf || !obj) {
/* Invalid args */
retval = CL_EARG;
*status = CL_EARG;
goto done;
}
if (!stream || !streamlen || fout < 0) {
cli_dbgmsg("pdf_decodestream: no filters or stream on obj %u %u\n", obj->id>>8, obj->id&0xff);
retval = CL_ENULLARG;
*status = CL_ENULLARG;
goto done;
}
*status = CL_SUCCESS;
#if 0
if (params)
pdf_print_dict(params, 0);
@ -136,7 +138,7 @@ ptrdiff_t pdf_decodestream(
token = cli_malloc(sizeof(struct pdf_token));
if (!token) {
retval = CL_EMEM;
*status = CL_EMEM;
goto done;
}
@ -149,7 +151,7 @@ ptrdiff_t pdf_decodestream(
token->content = cli_malloc(streamlen);
if (!token->content) {
free(token);
retval = CL_EMEM;
*status = CL_EMEM;
goto done;
}
memcpy(token->content, stream, streamlen);
@ -157,38 +159,35 @@ ptrdiff_t pdf_decodestream(
cli_dbgmsg("pdf_decodestream: detected %lu applied filters\n", (long unsigned)(obj->numfilters));
bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, &retval, objstm);
/*
* Pass back the return value, though we really only care
* if it is CV_VIRUS or CL_SUCCESS.
*/
if (retval == CL_VIRUS)
retval = CL_VIRUS;
else
retval = CL_SUCCESS;
bytes_scanned = pdf_decodestream_internal(pdf, obj, params, token, fout, status, objstm);
if ((CL_VIRUS == *status) && !SCAN_ALLMATCHES) {
goto done;
}
if (!token->success) {
if (0 == token->success) {
/*
* If it was successful, the internal() function calls cli_writen()
* However, in this case... no non-forced filter are decoded,
* so return the raw stream.
* Either:
* a) it failed to decode any filters, or
* b) there were no filters.
*
* Write out the raw stream to be scanned.
*
* Nota bene: If it did decode any filters, the internal() function would
* have written out the decoded stream to be scanned.
*/
if (!cli_checklimits("pdf", pdf->ctx, streamlen, 0, 0)) {
cli_dbgmsg("pdf_decodestream: no non-forced filters decoded, returning raw stream\n");
if (cli_writen(fout, stream, streamlen) != streamlen) {
cli_errmsg("pdf_decodestream: failed to write output file\n");
retval = CL_EWRITE;
bytes_scanned = -1;
goto done;
cli_errmsg("pdf_decodestream: failed to write raw stream to output file\n");
} else {
bytes_scanned = streamlen;
}
bytes_scanned = streamlen;
}
}
done:
*status = retval;
/*
* Free up the token, and token content, if any.
*/
@ -220,28 +219,30 @@ done:
* @param objstm (optional) Object stream context structure.
* @return ptrdiff_t The number of bytes we wrote to 'fout'. -1 if failed out.
*/
static ptrdiff_t pdf_decodestream_internal(
static size_t pdf_decodestream_internal(
struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
struct pdf_token *token, int fout, cl_error_t *status, struct objstm_struct *objstm)
{
cl_error_t vir = CL_CLEAN;
cl_error_t retval = CL_SUCCESS;
ptrdiff_t bytes_scanned = -1;
size_t bytes_scanned = 0;
cli_ctx *ctx = pdf->ctx;
const char *filter = NULL;
int i;
if (!status) {
/* invalid args, and no way to pass back the status code */
return -1;
return 0;
}
if (!pdf || !obj || !token) {
/* Invalid args */
retval = CL_EARG;
*status = CL_EARG;
goto done;
}
*status = CL_SUCCESS;
/*
* if pdf is decryptable, scan for CRYPT filter
* if none, force a DECRYPT filter application
@ -253,6 +254,7 @@ static ptrdiff_t pdf_decodestream_internal(
cli_dbgmsg("pdf_decodestream_internal: decoding => non-filter CRYPT\n");
retval = filter_decrypt(pdf, obj, params, token, 1);
if (retval != CL_SUCCESS) {
*status = CL_EPARSE;
goto done;
}
}
@ -323,12 +325,15 @@ static ptrdiff_t pdf_decodestream_internal(
switch (retval) {
case CL_VIRUS:
*status = CL_VIRUS;
reason = "detection";
break;
case CL_BREAK:
*status = CL_SUCCESS;
reason = "decoding break";
break;
default:
*status = CL_EPARSE;
reason = "decoding error";
break;
}
@ -341,31 +346,35 @@ static ptrdiff_t pdf_decodestream_internal(
/* Dump the stream content to a text file if keeptmp is enabled. */
if (pdf->ctx->engine->keeptmp) {
retval = pdf_decode_dump(pdf, obj, token, i+1);
if (retval != CL_SUCCESS) {
goto done;
if (CL_SUCCESS != pdf_decode_dump(pdf, obj, token, i+1)) {
cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to temp file\n");
}
}
}
if (token->success > 0) {
/*
* Looks like we successfully decoded the stream, so lets write it out.
* In the failure case, the caller will deal with the raw stream.
* Looks like we successfully decoded some or all of the stream filters,
* so lets write it out to a file descriptor we scan.
*
* In the event that we didn't decode any filters (or maybe there
* weren't any filters), the calling function will do the same with
* the raw stream.
*/
if (!cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
if (CL_SUCCESS == cli_checklimits("pdf", pdf->ctx, token->length, 0, 0)) {
if (cli_writen(fout, token->content, token->length) != token->length) {
cli_errmsg("pdf_decodestream_internal: failed to write output file\n");
retval = CL_EWRITE;
bytes_scanned = -1;
goto done;
cli_errmsg("pdf_decodestream_internal: failed to write decoded stream content to output file\n");
} else {
bytes_scanned = token->length;
}
bytes_scanned = token->length;
}
}
if (NULL != objstm)
if ((NULL != objstm) &&
((CL_SUCCESS == *status) || ((CL_VIRUS == *status) && SCAN_ALLMATCHES)))
{
int objs_found = pdf->nobjs;
/*
* The caller indicated that the decoded data is an object stream.
* Perform experimental object stream parsing to extract objects from the stream.
@ -377,7 +386,9 @@ static ptrdiff_t pdf_decodestream_internal(
token->content = NULL;
token->length = 0;
int objs_found = pdf->nobjs;
/* Don't store the result. It's ok if some or all objects failed to parse.
It would be far worse to add objects from a stream to the list, and then free
the stream buffer due to an "error". */
if (CL_SUCCESS != pdf_find_and_parse_objs_in_objstm(pdf, objstm))
{
cli_dbgmsg("pdf_decodestream_internal: pdf_find_and_parse_objs_in_objstm failed!\n");
@ -392,14 +403,9 @@ static ptrdiff_t pdf_decodestream_internal(
done:
*status = retval;
if (vir == CL_VIRUS)
*status = CL_VIRUS;
if (*status == CL_BREAK)
*status = CL_SUCCESS;
return bytes_scanned;
}

@ -51,9 +51,9 @@
* @param fout File descriptor to write to a temp file.
* @param[out] rc Return code ()
* @param objstm Object stream context structure.
* @return ptrdiff_t
* @return size_t The number of bytes written to fout to be scanned.
*/
ptrdiff_t pdf_decodestream(
size_t pdf_decodestream(
struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params,
const char *stream, uint32_t streamlen, int xref, int fout, cl_error_t *status,
struct objstm_struct *objstm);

Loading…
Cancel
Save