diff --git a/clamav-devel/libclamav/pdf.c b/clamav-devel/libclamav/pdf.c index 0c503d041..19161639b 100644 --- a/clamav-devel/libclamav/pdf.c +++ b/clamav-devel/libclamav/pdf.c @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -static char const rcsid[] = "$Id: pdf.c,v 1.19 2005/05/24 18:44:03 nigelhorne Exp $"; +static char const rcsid[] = "$Id: pdf.c,v 1.20 2005/05/25 13:37:33 nigelhorne Exp $"; #if HAVE_CONFIG_H #include "clamav-config.h" @@ -66,10 +66,12 @@ cli_pdf(const char *dir, int desc) return CL_CLEAN; #else struct stat statb; - off_t size; + off_t size; /* total number of bytes in the file */ long bytesleft, trailerlength; - char *buf; + char *buf; /* start of memory mapped area */ const char *p, *q, *trailerstart; + const char *xrefstart; /* cross reference table */ + size_t xreflength; int rc = CL_CLEAN; cli_dbgmsg("in cli_pdf()\n"); @@ -89,9 +91,7 @@ cli_pdf(const char *dir, int desc) if(buf == MAP_FAILED) return CL_EMEM; - bytesleft = (long)size; - - cli_dbgmsg("cli_pdf: scanning %lu bytes\n", bytesleft); + cli_dbgmsg("cli_pdf: scanning %lu bytes\n", size); /* Lines are terminated by \r, \n or both */ @@ -101,16 +101,16 @@ cli_pdf(const char *dir, int desc) return CL_EFORMAT; } - q = pdf_nextlinestart(p, bytesleft); + q = pdf_nextlinestart(p, size); if(q == NULL) { munmap(buf, size); return CL_EFORMAT; } - bytesleft -= (int)(q - p); + bytesleft = size - (long)(q - p); p = q; /* Find the file trailer */ - for(q = &p[bytesleft - 1]; q > p; --q) + for(q = &p[bytesleft - 6]; q > p; --q) if(memcmp(q, "%%EOF", 5) == 0) break; @@ -131,31 +131,47 @@ cli_pdf(const char *dir, int desc) /* * This tends to mean that the file is, in effect, read-only */ - cli_warnmsg("Encrypted PDF files not yet supported\n"); munmap(buf, size); - + cli_warnmsg("Encrypted PDF files not yet supported\n"); return CL_EFORMAT; } bytesleft -= trailerlength; + for(xrefstart = trailerstart; xrefstart > p; --xrefstart) + if(memcmp(xrefstart, "xref", 4) == 0) + break; + + if(xrefstart == p) { + munmap(buf, size); + return CL_EFORMAT; + } + + xreflength = (size_t)(trailerstart - xrefstart); + + bytesleft -= xreflength; + + /* + * For each object in the body section + */ while((q = cli_pmemstr(p, bytesleft, " obj", 4)) != NULL) { int is_ascii85decode, is_flatedecode, fout, len; - const char *s, *streamstart, *u, *v, *objstart; + const char *objstart, *objend, *streamstart, *streamend; size_t length, objlen, streamlen; char fullname[NAME_MAX + 1]; bytesleft -= (q - p) + 4; objstart = p = &q[4]; - q = cli_pmemstr(p, bytesleft, "endobj", 6); - if(q == NULL) { + objend = cli_pmemstr(p, bytesleft, "endobj", 6); + if(objend == NULL) { cli_dbgmsg("No matching endobj"); break; } - bytesleft -= (q - p) + 6; - p = &q[6]; - objlen = (size_t)(q - objstart); + bytesleft -= (objend - p) + 6; + p = &objend[6]; + objlen = (size_t)(objend - objstart); + /* Is this object a stream? */ streamstart = cli_pmemstr(objstart, objlen, "stream", 6); if(streamstart == NULL) continue; @@ -164,43 +180,41 @@ cli_pdf(const char *dir, int desc) /* * TODO: handle F and FFilter? */ - for(s = objstart; s < streamstart; s++) - if(*s == '/') { - if(strncmp(++s, "Length ", 7) == 0) { - s += 7; - length = atoi(s); - while(isdigit(*s)) - s++; - } else if((strncmp(s, "FlateDecode ", 12) == 0) || - (strncmp(s, "FlateDecode\n", 12) == 0)) { + for(q = objstart; q < streamstart; q++) + if(*q == '/') { + if(strncmp(++q, "Length ", 7) == 0) { + q += 7; + length = atoi(q); + while(isdigit(*q)) + q++; + q--; + } else if(strncmp(q, "FlateDecode", 11) == 0) { is_flatedecode = 1; - s += 12; - } else if((strncmp(s, "ASCII85Decode ", 13) == 0) || - (strncmp(s, "ASCII85Decode\n", 13) == 0)) { + q += 12; + } else if(strncmp(q, "ASCII85Decode", 12) == 0) { is_ascii85decode = 1; - s += 12; + q += 13; } } - /* q points to the end of the object (objend) */ - streamstart += 6; - len = (int)(q - streamstart); - u = pdf_nextlinestart(streamstart, len); - if(u == NULL) + /* objend points to the end of the object (start of "endobj") */ + streamstart += 6; /* go past the word "stream" */ + len = (int)(objend - streamstart); + q = pdf_nextlinestart(streamstart, len); + if(q == NULL) break; - len -= (int)(u - streamstart); - streamstart = u; - u = cli_pmemstr(streamstart, len, "endstream\n", 10); - if(u == NULL) { - u = cli_pmemstr(streamstart, len, "endstream\r", 10); - if(u == NULL) { + len -= (int)(q - streamstart); + streamstart = q; + streamend = cli_pmemstr(streamstart, len, "endstream\n", 10); + if(streamend == NULL) { + streamend = cli_pmemstr(streamstart, len, "endstream\r", 10); + if(streamend == NULL) { cli_dbgmsg("No endstream"); break; } } - v = u; - while(strchr("\r\n", *--v)) - --u; + /*while(strchr("\r\n", *--streamend)) + ;*/ snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir); #if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN) fout = mkstemp(fullname); @@ -215,9 +229,9 @@ cli_pdf(const char *dir, int desc) break; } - streamlen = (int)(u - streamstart) + 1; + streamlen = (int)(streamend - streamstart) + 1; - /*cli_dbgmsg("length %d, streamlen %d\n", length, streamlen);*/ + cli_dbgmsg("length %d, streamlen %d\n", length, streamlen); #if 0 /* FIXME: this isn't right... */ @@ -285,11 +299,6 @@ flatedecode(const unsigned char *buf, size_t len, int fout) cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", len); - while(strchr("\r\n", *buf)) { - len--; - buf++; - } - stream.zalloc = (alloc_func)Z_NULL; stream.zfree = (free_func)Z_NULL; stream.opaque = (void *)NULL; @@ -352,7 +361,7 @@ ascii85decode(const char *buf, size_t len, unsigned char *output) byte = EOF; if(byte >= '!' && byte <= 'u') { - sum = sum * 85 + ((unsigned long)byte - '!'); + sum = sum * 85 + ((uint32_t)byte - '!'); if(++quintet == 5) { *output++ = sum >> 24; *output++ = (sum >> 16) & 0xFF; @@ -380,8 +389,7 @@ ascii85decode(const char *buf, size_t len, unsigned char *output) cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n"); return -1; } - for(i = 0; i < 5 - quintet; i++) - sum *= 85; + sum *= 85 * (5 - quintet); if(quintet > 1) sum += (0xFFFFFF >> ((quintet - 2) * 8)); ret += quintet; @@ -389,9 +397,10 @@ ascii85decode(const char *buf, size_t len, unsigned char *output) *output++ = (sum >> (24 - 8 * i)) & 0xFF; quintet = 0; } + len = 0; break; } else if(!isspace(byte)) { - cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n", byte, len); + cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n", byte & 0xFF, len); return -1; } }