mirror of https://github.com/Cisco-Talos/clamav
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
859 lines
21 KiB
859 lines
21 KiB
/*
|
|
* Copyright (C) 2005-2007 Nigel Horne <njh@bandsman.co.uk>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*
|
|
* TODO: Embedded fonts
|
|
* TODO: Predictor image handling
|
|
*/
|
|
static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include "clamav-config.h"
|
|
#endif
|
|
|
|
#ifdef HAVE_MMAP
|
|
#include <stdio.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include <fcntl.h>
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
#ifdef HAVE_LIMITS_H
|
|
#include <limits.h>
|
|
#endif
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_MMAN_H
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_ZLIB_H
|
|
#include <zlib.h>
|
|
#endif
|
|
|
|
#ifdef C_WINDOWS
|
|
#include <io.h>
|
|
#endif
|
|
|
|
#include "clamav.h"
|
|
#include "others.h"
|
|
#include "mbox.h"
|
|
#include "pdf.h"
|
|
|
|
#ifdef CL_DEBUG
|
|
/*#define SAVE_TMP /* Save the file being worked on in tmp */
|
|
#endif
|
|
|
|
static int try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx);
|
|
static int flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx);
|
|
static int ascii85decode(const char *buf, off_t len, unsigned char *output);
|
|
static const char *pdf_nextlinestart(const char *ptr, size_t len);
|
|
static const char *pdf_nextobject(const char *ptr, size_t len);
|
|
static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
|
|
|
|
/*
|
|
* TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
|
|
*/
|
|
int
|
|
cli_pdf(const char *dir, int desc, cli_ctx *ctx)
|
|
{
|
|
off_t size; /* total number of bytes in the file */
|
|
off_t bytesleft, trailerlength;
|
|
char *buf; /* start of memory mapped area */
|
|
const char *p, *q, *trailerstart;
|
|
const char *xrefstart; /* cross reference table */
|
|
/*size_t xreflength;*/
|
|
table_t *md5table;
|
|
int printed_predictor_message, printed_embedded_font_message, ret, rc;
|
|
unsigned int files;
|
|
struct stat statb;
|
|
|
|
cli_dbgmsg("in cli_pdf(%s)\n", dir);
|
|
|
|
if(fstat(desc, &statb) < 0) {
|
|
cli_errmsg("cli_pdf: fstat() failed\n");
|
|
return CL_EOPEN;
|
|
}
|
|
|
|
size = statb.st_size;
|
|
|
|
if(size <= 7) /* doesn't even include the file header */
|
|
return CL_CLEAN;
|
|
|
|
p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
|
|
if(buf == MAP_FAILED) {
|
|
cli_errmsg("cli_pdf: mmap() failed\n");
|
|
return CL_EMEM;
|
|
}
|
|
|
|
cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size);
|
|
|
|
/* Lines are terminated by \r, \n or both */
|
|
|
|
/* File Header */
|
|
if(memcmp(p, "%PDF-1.", 7) != 0) {
|
|
munmap(buf, size);
|
|
cli_dbgmsg("cli_pdf: file header not found\n");
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
#if 0
|
|
q = pdf_nextlinestart(&p[6], size - 6);
|
|
if(q == NULL) {
|
|
munmap(buf, size);
|
|
return CL_CLEAN;
|
|
}
|
|
bytesleft = size - (long)(q - p);
|
|
p = q;
|
|
#else
|
|
p = &p[6];
|
|
bytesleft = size - 6;
|
|
#endif
|
|
|
|
/* Find the file trailer */
|
|
for(q = &p[bytesleft - 6]; q > p; --q)
|
|
if(memcmp(q, "%%EOF", 5) == 0)
|
|
break;
|
|
|
|
if(q <= p) {
|
|
munmap(buf, size);
|
|
cli_dbgmsg("cli_pdf: trailer not found\n");
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
for(trailerstart = &q[-7]; trailerstart > p; --trailerstart)
|
|
if(memcmp(trailerstart, "trailer", 7) == 0)
|
|
break;
|
|
|
|
/*
|
|
* q points to the end of the trailer section
|
|
*/
|
|
trailerlength = (long)(q - trailerstart);
|
|
if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
|
|
/*
|
|
* This tends to mean that the file is, in effect, read-only
|
|
* http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt
|
|
* http://www.adobe.com/devnet/pdf/
|
|
*/
|
|
munmap(buf, size);
|
|
cli_dbgmsg("cli_pdf: Encrypted PDF files not yet supported\n");
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
/*
|
|
* not true, since edits may put data after the trailer
|
|
bytesleft -= trailerlength;
|
|
*/
|
|
|
|
/*
|
|
* FIXME: Handle more than one xref section in the xref table
|
|
*/
|
|
for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
|
|
if(memcmp(xrefstart, "xref", 4) == 0)
|
|
/*
|
|
* Make sure it's the start of the line, not a startxref
|
|
* token
|
|
*/
|
|
if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
|
|
break;
|
|
|
|
if(xrefstart == p) {
|
|
munmap(buf, size);
|
|
cli_dbgmsg("cli_pdf: xref not found\n");
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
printed_predictor_message = printed_embedded_font_message = 0;
|
|
|
|
md5table = tableCreate();
|
|
/*
|
|
* not true, since edits may put data after the trailer
|
|
xreflength = (size_t)(trailerstart - xrefstart);
|
|
bytesleft -= xreflength;
|
|
*/
|
|
|
|
files = 0;
|
|
|
|
/*
|
|
* The body section consists of a sequence of indirect objects
|
|
*/
|
|
while((p < xrefstart) && (cli_checklimits("cli_pdf", ctx, 0, 0, 0)==CL_CLEAN) &&
|
|
((q = pdf_nextobject(p, bytesleft)) != NULL)) {
|
|
int is_ascii85decode, is_flatedecode, fout, len, has_cr;
|
|
/*int object_number, generation_number;*/
|
|
const char *objstart, *objend, *streamstart, *streamend;
|
|
unsigned char *md5digest;
|
|
unsigned long length, objlen, real_streamlen, calculated_streamlen;
|
|
int is_embedded_font, predictor;
|
|
char fullname[NAME_MAX + 1];
|
|
|
|
rc=CL_CLEAN;
|
|
if(q == xrefstart)
|
|
break;
|
|
if(memcmp(q, "xref", 4) == 0)
|
|
break;
|
|
|
|
/*object_number = atoi(q);*/
|
|
bytesleft -= (off_t)(q - p);
|
|
p = q;
|
|
|
|
if(memcmp(q, "endobj", 6) == 0)
|
|
continue;
|
|
if(!isdigit(*q)) {
|
|
cli_dbgmsg("cli_pdf: Object number missing\n");
|
|
break;
|
|
}
|
|
q = pdf_nextobject(p, bytesleft);
|
|
if((q == NULL) || !isdigit(*q)) {
|
|
cli_dbgmsg("cli_pdf: Generation number missing\n");
|
|
break;
|
|
}
|
|
/*generation_number = atoi(q);*/
|
|
bytesleft -= (off_t)(q - p);
|
|
p = q;
|
|
|
|
q = pdf_nextobject(p, bytesleft);
|
|
if((q == NULL) || (memcmp(q, "obj", 3) != 0)) {
|
|
cli_dbgmsg("cli_pdf: Indirect object missing \"obj\"\n");
|
|
break;
|
|
}
|
|
|
|
bytesleft -= (off_t)((q - p) + 3);
|
|
objstart = p = &q[3];
|
|
objend = cli_pmemstr(p, bytesleft, "endobj", 6);
|
|
if(objend == NULL) {
|
|
cli_dbgmsg("cli_pdf: No matching endobj\n");
|
|
break;
|
|
}
|
|
bytesleft -= (off_t)((objend - p) + 6);
|
|
p = &objend[6];
|
|
objlen = (unsigned long)(objend - objstart);
|
|
|
|
/* Is this object a stream? */
|
|
streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
|
|
if(streamstart == NULL)
|
|
continue;
|
|
|
|
is_embedded_font = length = is_ascii85decode =
|
|
is_flatedecode = 0;
|
|
predictor = 1;
|
|
|
|
/*
|
|
* TODO: handle F and FFilter?
|
|
*/
|
|
q = objstart;
|
|
while(q < streamstart) {
|
|
if(*q == '/') { /* name object */
|
|
/*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/
|
|
if(strncmp(++q, "Length ", 7) == 0) {
|
|
q += 7;
|
|
length = atoi(q);
|
|
while(isdigit(*q))
|
|
q++;
|
|
/*
|
|
* Note: incremental updates are not
|
|
* supported
|
|
*/
|
|
if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
|
|
const char *r;
|
|
char b[14];
|
|
|
|
q += 4;
|
|
cli_dbgmsg("cli_pdf: Length is in indirect obj %lu\n",
|
|
length);
|
|
snprintf(b, sizeof(b),
|
|
"\n%lu 0 obj", length);
|
|
length = (unsigned long)strlen(b);
|
|
r = cli_pmemstr(buf, size, b, length);
|
|
if(r == NULL) {
|
|
b[0] = '\r';
|
|
r = cli_pmemstr(buf, size, b, length);
|
|
}
|
|
if(r) {
|
|
r += length - 1;
|
|
r = pdf_nextobject(r, bytesleft - (r - q));
|
|
if(r) {
|
|
length = atoi(r);
|
|
while(isdigit(*r))
|
|
r++;
|
|
cli_dbgmsg("cli_pdf: length in '%s' %lu\n",
|
|
&b[1],
|
|
length);
|
|
}
|
|
} else
|
|
cli_dbgmsg("cli_pdf: Couldn't find '%s'\n",
|
|
&b[1]);
|
|
}
|
|
q--;
|
|
} else if(strncmp(q, "Length2 ", 8) == 0)
|
|
is_embedded_font = 1;
|
|
else if(strncmp(q, "Predictor ", 10) == 0) {
|
|
q += 10;
|
|
predictor = atoi(q);
|
|
while(isdigit(*q))
|
|
q++;
|
|
q--;
|
|
} else if(strncmp(q, "FlateDecode", 11) == 0) {
|
|
is_flatedecode = 1;
|
|
q += 11;
|
|
} else if(strncmp(q, "ASCII85Decode", 13) == 0) {
|
|
is_ascii85decode = 1;
|
|
q += 13;
|
|
}
|
|
}
|
|
q = pdf_nextobject(q, (size_t)(streamstart - q));
|
|
if(q == NULL)
|
|
break;
|
|
}
|
|
|
|
if(is_embedded_font) {
|
|
/*
|
|
* Need some documentation, the only I can find a
|
|
* reference to is not free, if some kind soul wishes
|
|
* to donate a copy, please contact me!
|
|
* (http://safari.adobepress.com/0321304748)
|
|
*/
|
|
if(!printed_embedded_font_message) {
|
|
cli_dbgmsg("cli_pdf: Embedded fonts not yet supported\n");
|
|
printed_embedded_font_message = 1;
|
|
}
|
|
continue;
|
|
}
|
|
if(predictor > 1) {
|
|
/*
|
|
* Needs some thought
|
|
*/
|
|
if(!printed_predictor_message) {
|
|
cli_dbgmsg("cli_pdf: Predictor %d not honoured for embedded image\n",
|
|
predictor);
|
|
printed_predictor_message = 1;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* objend points to the end of the object (start of "endobj") */
|
|
streamstart += 6; /* go past the word "stream" */
|
|
len = (int)(objend - streamstart);
|
|
q = pdf_nextlinestart(streamstart, len);
|
|
if(q == NULL)
|
|
break;
|
|
len -= (int)(q - streamstart);
|
|
streamstart = q;
|
|
streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
|
|
if(streamend == NULL) {
|
|
streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
|
|
if(streamend == NULL) {
|
|
cli_dbgmsg("cli_pdf: No endstream\n");
|
|
break;
|
|
}
|
|
has_cr = 1;
|
|
} else
|
|
has_cr = 0;
|
|
snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir);
|
|
#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
|
|
fout = mkstemp(fullname);
|
|
#elif defined(C_WINDOWS)
|
|
if(_mktemp(fullname) == NULL) {
|
|
/* mktemp only allows 26 files */
|
|
char *name = cli_gentemp(dir);
|
|
if(name == NULL)
|
|
fout = -1;
|
|
else {
|
|
strcpy(fullname, name);
|
|
free(name);
|
|
fout = open(fullname,
|
|
O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
|
|
}
|
|
} else
|
|
fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
|
|
#else
|
|
mktemp(fullname);
|
|
fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
|
|
#endif
|
|
|
|
if(fout < 0) {
|
|
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno));
|
|
rc = CL_ETMPFILE;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Calculate the length ourself, the Length parameter is often
|
|
* wrong
|
|
*/
|
|
if((*--streamend != '\n') && (*streamend != '\r'))
|
|
streamend++;
|
|
else if(has_cr && (*--streamend != '\r'))
|
|
streamend++;
|
|
|
|
if(streamend <= streamstart) {
|
|
close(fout);
|
|
cli_dbgmsg("cli_pdf: Empty stream\n");
|
|
unlink(fullname);
|
|
continue;
|
|
}
|
|
calculated_streamlen = (int)(streamend - streamstart);
|
|
real_streamlen = length;
|
|
|
|
if(calculated_streamlen != real_streamlen)
|
|
cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");
|
|
|
|
cli_dbgmsg("cli_pdf: length %lu, calculated_streamlen %lu isFlate %d isASCII85 %d\n",
|
|
length, calculated_streamlen,
|
|
is_flatedecode, is_ascii85decode);
|
|
|
|
#if 0
|
|
/* FIXME: this isn't right... */
|
|
if(length)
|
|
/*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
|
|
streamlen = MIN(length, streamlen);
|
|
#endif
|
|
|
|
if(is_ascii85decode) {
|
|
unsigned char *tmpbuf;
|
|
int ret = cli_checklimits("cli_pdf", ctx, calculated_streamlen * 5, calculated_streamlen, real_streamlen);
|
|
|
|
if(ret != CL_CLEAN) {
|
|
close(fout);
|
|
unlink(fullname);
|
|
continue;
|
|
}
|
|
|
|
tmpbuf = cli_malloc(calculated_streamlen * 5);
|
|
|
|
if(tmpbuf == NULL) {
|
|
close(fout);
|
|
unlink(fullname);
|
|
continue;
|
|
}
|
|
|
|
ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf);
|
|
|
|
if(ret == -1) {
|
|
free(tmpbuf);
|
|
close(fout);
|
|
unlink(fullname);
|
|
continue;
|
|
}
|
|
if(ret) {
|
|
unsigned char *t;
|
|
|
|
real_streamlen = ret;
|
|
/* free unused trailing bytes */
|
|
t = (unsigned char *)cli_realloc(tmpbuf,calculated_streamlen);
|
|
if(t == NULL) {
|
|
free(tmpbuf);
|
|
close(fout);
|
|
unlink(fullname);
|
|
continue;
|
|
}
|
|
tmpbuf = t;
|
|
/*
|
|
* Note that it will probably be both
|
|
* ascii85encoded and flateencoded
|
|
*/
|
|
|
|
if(is_flatedecode)
|
|
rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
|
|
else
|
|
rc = cli_writen(fout, (const char *)streamstart, real_streamlen)==real_streamlen ? CL_CLEAN : CL_EIO;
|
|
}
|
|
free(tmpbuf);
|
|
} else if(is_flatedecode) {
|
|
rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);
|
|
|
|
} else {
|
|
cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n",
|
|
(unsigned long)real_streamlen);
|
|
if((rc = cli_checklimits("cli_pdf", ctx, real_streamlen, 0, 0))==CL_CLEAN)
|
|
rc = cli_writen(fout, (const char *)streamstart, real_streamlen) == real_streamlen ? CL_CLEAN : CL_EIO;
|
|
}
|
|
|
|
if (rc == CL_CLEAN) {
|
|
cli_dbgmsg("cli_pdf: extracted file %u to %s\n", ++files, fullname);
|
|
|
|
lseek(fout, 0, SEEK_SET);
|
|
md5digest = cli_md5digest(fout);
|
|
|
|
if(tableFind(md5table, md5digest) >= 0) {
|
|
cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
|
|
free(md5digest);
|
|
close(fout);
|
|
unlink(fullname);
|
|
continue;
|
|
} else
|
|
tableInsert(md5table, md5digest, 1);
|
|
|
|
free(md5digest);
|
|
|
|
lseek(fout, 0, SEEK_SET);
|
|
rc = cli_magic_scandesc(fout, ctx);
|
|
}
|
|
close(fout);
|
|
if(!cli_leavetemps_flag) unlink(fullname);
|
|
if(rc != CL_CLEAN) break;
|
|
}
|
|
|
|
munmap(buf, size);
|
|
|
|
tableDestroy(md5table);
|
|
|
|
cli_dbgmsg("cli_pdf: returning %d\n", rc);
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
* flate inflation - returns clamAV status, e.g CL_SUCCESS, CL_EZIP
|
|
*/
|
|
static int
|
|
try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx)
|
|
{
|
|
int ret = cli_checklimits("cli_pdf", ctx, real_len, 0, 0);
|
|
|
|
if (ret==CL_CLEAN && flatedecode(buf, real_len, fout, ctx) == CL_SUCCESS)
|
|
return CL_CLEAN;
|
|
|
|
if(real_len == calculated_len) {
|
|
/*
|
|
* Nothing more we can do to inflate
|
|
*/
|
|
cli_dbgmsg("cli_pdf: Bad compression in flate stream\n");
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
if(cli_checklimits("cli_pdf", ctx, calculated_len, 0, 0)!=CL_CLEAN)
|
|
return CL_CLEAN;
|
|
|
|
ret = flatedecode(buf, calculated_len, fout, ctx);
|
|
if(ret == CL_CLEAN)
|
|
return CL_CLEAN;
|
|
|
|
/* i.e. the PDF file is broken :-( */
|
|
cli_dbgmsg("cli_pdf: Bad compressed block length in flate stream\n");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx)
|
|
{
|
|
int zstat, ret;
|
|
off_t nbytes;
|
|
z_stream stream;
|
|
unsigned char output[BUFSIZ];
|
|
#ifdef SAVE_TMP
|
|
char tmpfilename[16];
|
|
int tmpfd;
|
|
#endif
|
|
|
|
cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len);
|
|
|
|
if(len == 0) {
|
|
cli_dbgmsg("cli_pdf: flatedecode len == 0\n");
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
#ifdef SAVE_TMP
|
|
/*
|
|
* Copy the embedded area for debugging, so that if it falls over
|
|
* we have a copy of the offending data. This is debugging code
|
|
* that you shouldn't of course install in a live environment. I am
|
|
* not interested in hearing about security issues with this section
|
|
* of the parser.
|
|
*/
|
|
strcpy(tmpfilename, "/tmp/pdfXXXXXX");
|
|
tmpfd = mkstemp(tmpfilename);
|
|
if(tmpfd < 0) {
|
|
perror(tmpfilename);
|
|
cli_errmsg("cli_pdf: Can't make debugging file\n");
|
|
} else {
|
|
FILE *tmpfp = fdopen(tmpfd, "w");
|
|
|
|
if(tmpfp) {
|
|
fwrite(buf, sizeof(char), len, tmpfp);
|
|
fclose(tmpfp);
|
|
cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
|
|
tmpfilename);
|
|
} else
|
|
cli_errmsg("cli_pdf: can't fdopen debugging file\n");
|
|
}
|
|
#endif
|
|
stream.zalloc = (alloc_func)Z_NULL;
|
|
stream.zfree = (free_func)Z_NULL;
|
|
stream.opaque = (void *)NULL;
|
|
stream.next_in = (Bytef *)buf;
|
|
stream.avail_in = len;
|
|
stream.next_out = output;
|
|
stream.avail_out = sizeof(output);
|
|
|
|
zstat = inflateInit(&stream);
|
|
if(zstat != Z_OK) {
|
|
cli_warnmsg("cli_pdf: inflateInit failed");
|
|
return CL_EMEM;
|
|
}
|
|
|
|
nbytes = 0;
|
|
|
|
while(stream.avail_in) {
|
|
zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */
|
|
switch(zstat) {
|
|
case Z_OK:
|
|
if(stream.avail_out == 0) {
|
|
int written;
|
|
if ((written=cli_writen(fout, output, sizeof(output)))!=sizeof(output)) {
|
|
cli_errmsg("cli_pdf: failed to write output file\n");
|
|
inflateEnd(&stream);
|
|
return CL_EIO;
|
|
}
|
|
nbytes += written;
|
|
|
|
if((ret=cli_checklimits("cli_pdf", ctx, nbytes, 0, 0))!=CL_CLEAN) {
|
|
inflateEnd(&stream);
|
|
return ret;
|
|
}
|
|
stream.next_out = output;
|
|
stream.avail_out = sizeof(output);
|
|
}
|
|
continue;
|
|
case Z_STREAM_END:
|
|
break;
|
|
default:
|
|
if(stream.msg)
|
|
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n",
|
|
(unsigned long)nbytes,
|
|
stream.msg);
|
|
else
|
|
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF attachment\n",
|
|
(unsigned long)nbytes, zstat);
|
|
inflateEnd(&stream);
|
|
return CL_CLEAN;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if(stream.avail_out != sizeof(output)) {
|
|
if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0) {
|
|
cli_errmsg("cli_pdf: failed to write output file\n");
|
|
inflateEnd(&stream);
|
|
return CL_EIO;
|
|
}
|
|
}
|
|
|
|
#ifdef SAVE_TMP
|
|
unlink(tmpfilename);
|
|
#endif
|
|
inflateEnd(&stream);
|
|
return CL_CLEAN;
|
|
}
|
|
|
|
/*
|
|
* ascii85 inflation, returns number of bytes in output, -1 for error
|
|
*
|
|
* See http://www.piclist.com/techref/method/encode.htm (look for base85)
|
|
*/
|
|
static int
|
|
ascii85decode(const char *buf, off_t len, unsigned char *output)
|
|
{
|
|
const char *ptr;
|
|
uint32_t sum = 0;
|
|
int quintet = 0;
|
|
int ret = 0;
|
|
|
|
if(cli_pmemstr(buf, len, "~>", 2) == NULL)
|
|
cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n");
|
|
|
|
ptr = buf;
|
|
|
|
cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);
|
|
|
|
while(len > 0) {
|
|
int byte = (len--) ? (int)*ptr++ : EOF;
|
|
|
|
if((byte == '~') && (*ptr == '>'))
|
|
byte = EOF;
|
|
|
|
if(byte >= '!' && byte <= 'u') {
|
|
sum = (sum * 85) + ((uint32_t)byte - '!');
|
|
if(++quintet == 5) {
|
|
*output++ = (unsigned char)(sum >> 24);
|
|
*output++ = (unsigned char)((sum >> 16) & 0xFF);
|
|
*output++ = (unsigned char)((sum >> 8) & 0xFF);
|
|
*output++ = (unsigned char)(sum & 0xFF);
|
|
ret += 4;
|
|
quintet = 0;
|
|
sum = 0;
|
|
}
|
|
} else if(byte == 'z') {
|
|
if(quintet) {
|
|
cli_dbgmsg("ascii85decode: unexpected 'z'\n");
|
|
return -1;
|
|
}
|
|
*output++ = '\0';
|
|
*output++ = '\0';
|
|
*output++ = '\0';
|
|
*output++ = '\0';
|
|
ret += 4;
|
|
} else if(byte == EOF) {
|
|
cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
|
|
if(quintet) {
|
|
int i;
|
|
|
|
if(quintet == 1) {
|
|
cli_dbgmsg("ascii85Decode: only 1 byte in last quintet\n");
|
|
return -1;
|
|
}
|
|
for(i = quintet; i < 5; i++)
|
|
sum *= 85;
|
|
|
|
if(quintet > 1)
|
|
sum += (0xFFFFFF >> ((quintet - 2) * 8));
|
|
ret += quintet;
|
|
for(i = 0; i < quintet - 1; i++)
|
|
*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
|
|
quintet = 0;
|
|
}
|
|
len = 0;
|
|
break;
|
|
} else if(!isspace(byte)) {
|
|
cli_dbgmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
|
|
byte & 0xFF, (unsigned long)len);
|
|
return -1;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Find the start of the next line
|
|
*/
|
|
static const char *
|
|
pdf_nextlinestart(const char *ptr, size_t len)
|
|
{
|
|
while(strchr("\r\n", *ptr) == NULL) {
|
|
if(--len == 0L)
|
|
return NULL;
|
|
ptr++;
|
|
}
|
|
while(strchr("\r\n", *ptr) != NULL) {
|
|
if(--len == 0L)
|
|
return NULL;
|
|
ptr++;
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
/*
|
|
* Return the start of the next PDF object.
|
|
* This assumes that we're not in a stream.
|
|
*/
|
|
static const char *
|
|
pdf_nextobject(const char *ptr, size_t len)
|
|
{
|
|
const char *p;
|
|
int inobject = 1;
|
|
|
|
while(len) {
|
|
switch(*ptr) {
|
|
case '\n':
|
|
case '\r':
|
|
case '%': /* comment */
|
|
p = pdf_nextlinestart(ptr, len);
|
|
if(p == NULL)
|
|
return NULL;
|
|
len -= (size_t)(p - ptr);
|
|
ptr = p;
|
|
inobject = 0;
|
|
break;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
case '[': /* Start of an array object */
|
|
case '\v':
|
|
case '\f':
|
|
case '<': /* Start of a dictionary object */
|
|
inobject = 0;
|
|
ptr++;
|
|
len--;
|
|
break;
|
|
case '/': /* Start of a name object */
|
|
return ptr;
|
|
default:
|
|
if(!inobject)
|
|
/* TODO: parse and return object type */
|
|
return ptr;
|
|
ptr++;
|
|
len--;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* like cli_memstr - but returns the location of the match
|
|
* FIXME: need a case insensitive version
|
|
*/
|
|
static const char *
|
|
cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
|
|
{
|
|
const char *pt, *hay;
|
|
size_t n;
|
|
|
|
if(haystack == needle)
|
|
return haystack;
|
|
|
|
if(hs < ns)
|
|
return NULL;
|
|
|
|
if(memcmp(haystack, needle, ns) == 0)
|
|
return haystack;
|
|
|
|
pt = hay = haystack;
|
|
n = hs;
|
|
|
|
while((pt = memchr(hay, needle[0], n)) != NULL) {
|
|
n -= (size_t)(pt - hay);
|
|
if(n < ns)
|
|
break;
|
|
|
|
if(memcmp(pt, needle, ns) == 0)
|
|
return pt;
|
|
|
|
if(hay == pt) {
|
|
n--;
|
|
hay++;
|
|
} else
|
|
hay = pt;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
#else /*!HAVE_MMAP*/
|
|
|
|
#include "clamav.h"
|
|
#include "others.h"
|
|
#include "pdf.h"
|
|
|
|
int
|
|
cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
|
|
{
|
|
cli_dbgmsg("File not decoded - PDF decoding needs mmap() (for now)\n");
|
|
return CL_CLEAN;
|
|
}
|
|
#endif
|
|
|