Add explanitory comments to the new pdf_parse_string function

pull/6/head
Shawn Webb 11 years ago
parent 6606d05000
commit 76aa0fc55a
  1. 36
      libclamav/pdf.c

@ -2832,7 +2832,14 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
checklen = strlen(str);
/* Yes, all of this is required to find the start and end of a potentially UTF-* string */
/* Yes, all of this is required to find the start and end of a potentially UTF-* string
*
* First, find the key of the key/value pair we're looking for in this object.
* Second, determine whether the value points to another object (NOTE: this is sketchy behavior)
* Third, attempt to determine if we're ASCII or UTF-*
* If we're ASCII, just copy the ASCII string into a new heap-allocated string and return that
* Fourth, Attempt to decode from UTF-* to ASCII
*/
for (p1=(char *)q; (p1 - q) < objsize-checklen; p1++)
if (!strncmp(p1, str, checklen))
@ -2853,21 +2860,44 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
if ((p1 - q) == objsize)
return NULL;
/* We should be at the start of the string, minus 1 */
if (isdigit(p1[0])) {
unsigned long objnum;
unsigned long objnum, revnum;
char *end;
/*
* This is kind of sketchy... This string says it points to another object.
* Try to get/parse the object and return the decoded value as an ASCII string.
*/
/* Get the object number */
objnum = strtoul(p1, &end, 10);
if ((end - p1) == 0)
return NULL;
/* Skip whitespaces */
p1 = end;
while ((p1 - q) < objsize && isspace(p1[0]))
p1++;
if (p1 - q == objsize)
return NULL;
/* Get revision number */
revnum = strtoul(p1, &end, 10);
if ((end - p1) == 0)
return NULL;
if (objnum > pdf->nobjs)
return NULL;
/* TODO: Parse the object, decode it if necessary, and return the string */
res = NULL;
return res;
}
/* Make a best effort to find the end of the string and determine if UTF-* */
p2 = ++p1;
while (1) {
int shouldbreak=1;
@ -2905,6 +2935,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
len = inlen = outlen = (size_t)(p2 - p1) + 1;
if (likelyutf == 0) {
/* We're not UTF-*, so just make a copy of the string and return that */
res = cli_calloc(1, len);
if (!(res))
return NULL;
@ -2914,6 +2945,7 @@ static char *pdf_parse_string(struct pdf_struct *pdf, const char *objstart, size
}
#if HAVE_ICONV
/* We should be UTF-* from this point on, try to decode */
buf = cli_calloc(1, inlen);
if (!(buf))
return NULL;

Loading…
Cancel
Save