|
|
|
@ -83,6 +83,7 @@ static const char *pdf_nextobject(const char *ptr, size_t len); |
|
|
|
|
static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar); |
|
|
|
|
static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
|
|
|
|
static struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar); |
|
|
|
|
static int is_object_reference(char *begin, char **endchar, uint32_t *id); |
|
|
|
|
static void pdf_free_dict(struct pdf_dict *dict); |
|
|
|
|
static void pdf_free_array(struct pdf_array *array); |
|
|
|
|
static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags); |
|
|
|
@ -2901,12 +2902,23 @@ static char *pdf_convert_utf(char *begin, size_t sz) |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int is_object_reference(char *begin, char **endchar) |
|
|
|
|
static int is_object_reference(char *begin, char **endchar, uint32_t *id) |
|
|
|
|
{ |
|
|
|
|
char *end = *endchar; |
|
|
|
|
char *p1=begin, *p2; |
|
|
|
|
unsigned long n; |
|
|
|
|
uint32_t t=0; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Object references are always this format: |
|
|
|
|
* XXXX YYYY R |
|
|
|
|
* Where XXXX is the object ID and YYYY is the revision ID of the object. |
|
|
|
|
* The letter R signifies that this is a reference. |
|
|
|
|
* |
|
|
|
|
* In between each item can be an arbitrary amount of whitespace. |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
/* Skip whitespace */ |
|
|
|
|
while (p1 < end && isspace(p1[0])) |
|
|
|
|
p1++; |
|
|
|
|
|
|
|
|
@ -2916,6 +2928,7 @@ static int is_object_reference(char *begin, char **endchar) |
|
|
|
|
if (!isnumber(p1[0])) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
/* Ensure strtoul() isn't going to go past our buffer */ |
|
|
|
|
p2 = p1+1; |
|
|
|
|
while (p2 < end && !isspace(p2[0])) |
|
|
|
|
p2++; |
|
|
|
@ -2927,6 +2940,9 @@ static int is_object_reference(char *begin, char **endchar) |
|
|
|
|
if (n == ULONG_MAX && errno) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
t = n<<8; |
|
|
|
|
|
|
|
|
|
/* Skip more whitespace */ |
|
|
|
|
p1 = p2; |
|
|
|
|
while (p1 < end && isspace(p1[0])) |
|
|
|
|
p1++; |
|
|
|
@ -2937,6 +2953,7 @@ static int is_object_reference(char *begin, char **endchar) |
|
|
|
|
if (!isnumber(p1[0])) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
/* Ensure strtoul() is going to go past our buffer */ |
|
|
|
|
p2 = p1+1; |
|
|
|
|
while (p2 < end && !isspace(p2[0])) |
|
|
|
|
p2++; |
|
|
|
@ -2948,6 +2965,8 @@ static int is_object_reference(char *begin, char **endchar) |
|
|
|
|
if (n == ULONG_MAX && errno) |
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
t |= (n&0xff); |
|
|
|
|
|
|
|
|
|
p1 = p2; |
|
|
|
|
while (p1 < end && isspace(p1[0])) |
|
|
|
|
p1++; |
|
|
|
@ -2957,6 +2976,9 @@ static int is_object_reference(char *begin, char **endchar) |
|
|
|
|
|
|
|
|
|
if (p1[0] == 'R') { |
|
|
|
|
*endchar = p1+1; |
|
|
|
|
if (id) |
|
|
|
|
*id = t; |
|
|
|
|
|
|
|
|
|
return 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -2971,6 +2993,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
|
|
|
|
char *buf, *outbuf, *res; |
|
|
|
|
int likelyutf = 0; |
|
|
|
|
unsigned int i; |
|
|
|
|
uint32_t objid; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Yes, all of this is required to find the start and end of a potentially UTF-* string |
|
|
|
@ -3011,45 +3034,14 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
|
|
|
|
/* We should be at the start of the string, minus 1 */ |
|
|
|
|
|
|
|
|
|
p2 = q + objsize; |
|
|
|
|
if (is_object_reference(p1, &p2)) { |
|
|
|
|
unsigned long objnum, revnum; |
|
|
|
|
if (is_object_reference(p1, &p2, &objid)) { |
|
|
|
|
struct pdf_obj *newobj; |
|
|
|
|
char *end, *begin; |
|
|
|
|
STATBUF sb; |
|
|
|
|
uint32_t objflags; |
|
|
|
|
int fd; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This is kind of sketchy... This string says it points to another object. |
|
|
|
|
* Try to get/parse the object and return the decoded value as an ASCII/UTF-8 string. |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
/* Get the object number */ |
|
|
|
|
objnum = strtoul(p1, &end, 10); |
|
|
|
|
if ((end - p1) == 0) |
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
/* Skip whitespace and get the revision number */ |
|
|
|
|
p1 = end+1; |
|
|
|
|
while (p1 - q < objsize && isspace(p1[0])) |
|
|
|
|
p1++; |
|
|
|
|
|
|
|
|
|
if (p1 - q == objsize) |
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
revnum = strtoul(p1, &end, 10); |
|
|
|
|
|
|
|
|
|
p1 = end+1; |
|
|
|
|
while (p1 - q < objsize && isspace(p1[0])) |
|
|
|
|
p1++; |
|
|
|
|
|
|
|
|
|
if (p1 - q == objsize) |
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
if (p1[0] != 'R') |
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
newobj = find_obj(pdf, obj, (objnum<<8) | (revnum & 0xff)); |
|
|
|
|
newobj = find_obj(pdf, obj, objid); |
|
|
|
|
if (!(newobj)) |
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
@ -3131,7 +3123,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const |
|
|
|
|
newobj->path = NULL; |
|
|
|
|
|
|
|
|
|
if (endchar) |
|
|
|
|
*endchar = p1; |
|
|
|
|
*endchar = p2; |
|
|
|
|
|
|
|
|
|
return res; |
|
|
|
|
} |
|
|
|
@ -3368,7 +3360,7 @@ static struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *o |
|
|
|
|
p1++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
is_object_reference(begin, &p1); |
|
|
|
|
is_object_reference(begin, &p1, NULL); |
|
|
|
|
|
|
|
|
|
val = cli_calloc((p1 - begin) + 2, 1); |
|
|
|
|
if (!(val)) |
|
|
|
@ -3517,7 +3509,7 @@ static struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj |
|
|
|
|
default: |
|
|
|
|
/* This should just be a number or the letter R */ |
|
|
|
|
p1 = end; |
|
|
|
|
if (!is_object_reference(begin, &p1)) { |
|
|
|
|
if (!is_object_reference(begin, &p1, NULL)) { |
|
|
|
|
p1 = begin+1; |
|
|
|
|
while (p1 < end && !isspace(p1[0])) |
|
|
|
|
p1++; |
|
|
|
@ -3639,12 +3631,14 @@ static void pdf_print_dict(struct pdf_dict *dict, unsigned long depth) |
|
|
|
|
struct pdf_dict_node *node; |
|
|
|
|
|
|
|
|
|
for (node = dict->nodes; node != NULL; node = node->next) { |
|
|
|
|
if (node->type == PDF_DICT_STRING) |
|
|
|
|
if (node->type == PDF_DICT_STRING) { |
|
|
|
|
cli_errmsg("dict[%lu][%s]: %s\n", depth, node->key, (char *)(node->value)); |
|
|
|
|
else if (node->type == PDF_DICT_ARRAY) |
|
|
|
|
} else if (node->type == PDF_DICT_ARRAY) { |
|
|
|
|
cli_errmsg("dict[%lu][%s]: Array =>\n", depth, node->key); |
|
|
|
|
pdf_print_array((struct pdf_array *)(node->value), depth); |
|
|
|
|
else if (node->type == PDF_DICT_DICT) |
|
|
|
|
} else if (node->type == PDF_DICT_DICT) { |
|
|
|
|
pdf_print_dict((struct pdf_dict *)(node->value), depth+1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -3939,8 +3933,11 @@ static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_act |
|
|
|
|
return; |
|
|
|
|
|
|
|
|
|
dict = pdf_parse_dict(pdf, obj, objsz, begin, NULL); |
|
|
|
|
if (dict) |
|
|
|
|
if (dict) { |
|
|
|
|
cli_errmsg("==== ==== ==== ====\n"); |
|
|
|
|
pdf_print_dict(dict, 0); |
|
|
|
|
pdf_free_dict(dict); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
begin = cli_memstr(objstart, objsz, "/Kids", 5); |
|
|
|
|
if (!(begin)) |
|
|
|
|