Add PDF object extraction recursion limits

Adds in object extraction recursion limits for object extraction as well
as well parsing string, array, and dictionaries during extraction.
The limit is set to 25.

Places recursion-depth variable in pdf parse context structure.
pull/730/head
Micah Snyder 3 years ago committed by Micah Snyder
parent 35ccb49f38
commit e9f7fe2a80
  1. 41
      libclamav/pdf.c
  2. 4
      libclamav/pdf.h
  3. 47
      libclamav/pdfng.c

@ -1439,6 +1439,11 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id >> 8, obj->id & 0xff);
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_extract_obj: Recursion limit reached.\n");
return CL_SUCCESS;
}
if (obj->objstm) {
cli_dbgmsg("pdf_extract_obj: extracting obj found in objstm.\n");
if (obj->objstm->streambuf == NULL) {
@ -1582,10 +1587,13 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
dict_len--;
}
if (dict_len > 4)
if (dict_len > 4) {
pdf->parse_recursion_depth++;
dparams = pdf_parse_dict(pdf, obj, obj->size, (char *)pstr, NULL);
else
pdf->parse_recursion_depth--;
} else {
cli_dbgmsg("pdf_extract_obj: failed to locate DecodeParms dictionary start\n");
}
}
/*
@ -3418,7 +3426,9 @@ static cl_error_t pdf_find_and_extract_objs(struct pdf_struct *pdf)
goto done;
}
pdf->parse_recursion_depth++;
status = pdf_extract_obj(pdf, obj, PDF_EXTRACT_OBJ_SCAN);
pdf->parse_recursion_depth--;
switch (status) {
case CL_EFORMAT:
/* Don't halt on one bad object */
@ -4109,7 +4119,10 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.author))
return;
pdf->parse_recursion_depth++;
pdf->stats.author->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Author", NULL, &(pdf->stats.author->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4134,7 +4147,10 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.creator))
return;
pdf->parse_recursion_depth++;
pdf->stats.creator->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Creator", NULL, &(pdf->stats.creator->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4159,7 +4175,10 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.modificationdate))
return;
pdf->parse_recursion_depth++;
pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4184,7 +4203,10 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.creationdate))
return;
pdf->parse_recursion_depth++;
pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4209,7 +4231,10 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.producer))
return;
pdf->parse_recursion_depth++;
pdf->stats.producer->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Producer", NULL, &(pdf->stats.producer->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4234,7 +4259,10 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.title))
return;
pdf->parse_recursion_depth++;
pdf->stats.title->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Title", NULL, &(pdf->stats.title->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4259,7 +4287,10 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.keywords))
return;
pdf->parse_recursion_depth++;
pdf->stats.keywords->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Keywords", NULL, &(pdf->stats.keywords->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4284,7 +4315,10 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.subject))
return;
pdf->parse_recursion_depth++;
pdf->stats.subject->data = pdf_parse_string(pdf, obj, objstart, obj->size, "/Subject", NULL, &(pdf->stats.subject->meta));
pdf->parse_recursion_depth--;
}
}
#endif
@ -4360,7 +4394,10 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
begin += 5;
pdf->parse_recursion_depth++;
array = pdf_parse_array(pdf, obj, obj->size, (char *)begin, NULL);
pdf->parse_recursion_depth--;
if (!(array)) {
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
return;

@ -24,6 +24,8 @@
#include "others.h"
#define PDF_FILTERLIST_MAX 64
#define PDF_OBJECT_RECURSION_LIMIT 25
struct objstm_struct {
uint32_t first; // offset of first obj
uint32_t current; // offset of current obj
@ -167,6 +169,7 @@ struct pdf_struct {
struct pdf_stats stats;
struct objstm_struct **objstms;
uint32_t nobjstms;
uint32_t parse_recursion_depth;
};
#define OBJ_FLAG_PDFNAME_NONE 0x0
@ -191,6 +194,7 @@ char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const cha
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta);
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
int is_object_reference(char *begin, char **endchar, uint32_t *id);
void pdf_free_dict(struct pdf_dict *dict);
void pdf_free_array(struct pdf_array *array);

@ -382,6 +382,11 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
char *res = NULL;
uint32_t objid;
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_parse_string: Recursion limit reached.\n");
return NULL;
}
if (obj->objstm) {
if (objsize > (size_t)(obj->objstm->streambuf_len - (objstart - obj->objstm->streambuf))) {
/* Possible attempt to exploit bb11980 */
@ -439,6 +444,7 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
p2 = (char *)(q + objsize);
if (is_object_reference(p1, &p2, &objid)) {
cl_error_t ret;
struct pdf_obj *newobj;
char *begin, *p3;
STATBUF sb;
@ -464,8 +470,12 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
objflags = newobj->flags;
newobj->flags |= (1 << OBJ_FORCEDUMP);
if (pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE) != CL_SUCCESS)
pdf->parse_recursion_depth++;
ret = pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE);
pdf->parse_recursion_depth--;
if (ret != CL_SUCCESS) {
return NULL;
}
newobj->flags = objflags;
@ -517,7 +527,9 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
switch (*p3) {
case '(':
case '<':
pdf->parse_recursion_depth++;
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
pdf->parse_recursion_depth--;
break;
default:
res = pdf_finalize_string(pdf, obj, begin, objsize2);
@ -664,6 +676,11 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
if (!(pdf) || !(obj) || !(begin))
return NULL;
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_parse_dict: Recursion limit reached\n");
return NULL;
}
objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
: (const char *)(obj->start + pdf->map);
@ -810,23 +827,31 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
switch (begin[0]) {
case '(':
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
pdf->parse_recursion_depth++;
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
pdf->parse_recursion_depth--;
begin = p1 + 2;
break;
case '[':
arr = pdf_parse_array(pdf, obj, end - objstart, begin, &p1);
pdf->parse_recursion_depth++;
arr = pdf_parse_array(pdf, obj, end - objstart, begin, &p1);
pdf->parse_recursion_depth--;
begin = p1 + 1;
break;
case '<':
if ((size_t)(begin - objstart) < objsize - 2) {
if (begin[1] == '<') {
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1);
pdf->parse_recursion_depth++;
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1);
pdf->parse_recursion_depth--;
begin = p1 + 2;
break;
}
}
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
pdf->parse_recursion_depth++;
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
pdf->parse_recursion_depth--;
begin = p1 + 2;
break;
default:
@ -933,6 +958,11 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
if (!(pdf) || !(obj) || !(begin))
return NULL;
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_parse_array: Recursion limit reached\n");
return NULL;
}
objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
: (const char *)(obj->start + pdf->map);
@ -1002,7 +1032,9 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
switch (begin[0]) {
case '<':
if ((size_t)(begin - objstart) < objsize - 2 && begin[1] == '<') {
pdf->parse_recursion_depth++;
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &begin);
pdf->parse_recursion_depth--;
begin += 2;
break;
}
@ -1010,12 +1042,15 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
/* Not a dictionary. Intentionally fall through. */
/* fall-through */
case '(':
pdf->parse_recursion_depth++;
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &begin, NULL);
pdf->parse_recursion_depth--;
begin += 2;
break;
case '[':
/* XXX We should have a recursion counter here */
pdf->parse_recursion_depth++;
arr = pdf_parse_array(pdf, obj, end - objstart, begin, &begin);
pdf->parse_recursion_depth--;
begin += 1;
break;
default:

Loading…
Cancel
Save