Merge branch 'master' of ssh://git.clam.sourcefire.com/var/lib/git/clamav-devel

remotes/push_mirror/guardrails
Tomasz Kojm 14 years ago
commit 5e2b47791c
  1. 4
      libclamav/bytecode_api.h
  2. 197
      libclamav/pdf.c

@ -130,7 +130,9 @@ enum pdf_objflags {
OBJ_TRUNCATED,
OBJ_FORCEDUMP,
OBJ_FILTER_STANDARD,
OBJ_LAUNCHACTION
OBJ_LAUNCHACTION,
OBJ_PAGE,
OBJ_CONTENTS
};
#ifdef __CLAMBC__

@ -56,6 +56,8 @@ static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
#include "arc4.h"
#include "rijndael.h"
#include "sha256.h"
#include "textnorm.h"
#ifdef CL_DEBUG
/*#define SAVE_TMP
@ -121,6 +123,17 @@ struct pdf_struct {
unsigned keylen;
};
/* define this to be noisy about things that we can't parse properly */
/*#define NOISY*/
#ifdef NOISY
#define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)
#define noisy_warnmsg cli_warnmsg
#else
#define noisy_msg (void)
#define noisy_warnmsg (void)
#endif
static const char *findNextNonWSBack(const char *q, const char *start)
{
while (q > start &&
@ -415,10 +428,18 @@ static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj,
else
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
if(stream.msg)
noisy_warnmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes,
stream.msg, obj->id>>8, obj->id&0xff);
else
noisy_warnmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
/* mark stream as bad only if not encrypted */
inflateEnd(&stream);
if (!nbytes) {
cli_dbgmsg("cli_pdf: dumping raw stream (probably encrypted)\n");
noisy_warnmsg("cli_pdf: dumping raw stream, probably encrypted and we failed to decrypt'n");
if (filter_writen(pdf, obj, fout, buf, len, sum) != len) {
cli_errmsg("cli_pdf: failed to write output file\n");
return CL_EWRITE;
@ -514,7 +535,7 @@ static int find_length(struct pdf_struct *pdf,
return length;
}
#define DUMP_MASK ((1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))
#define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))
static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary)
{
@ -583,6 +604,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
unsigned char iv[16];
unsigned len = *length;
unsigned char pad, i;
int nrounds;
cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %d\n", key_n, *length);
if (key_n > 32) {
@ -591,6 +613,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
}
if (len < 32) {
cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
return;
}
if (has_iv) {
@ -600,7 +623,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
} else
memset(iv, 0, sizeof(iv));
int nrounds = rijndaelSetupDecrypt(rk, key, key_n*8);
nrounds = rijndaelSetupDecrypt(rk, key, key_n*8);
while (len >= 16) {
unsigned i;
rijndaelDecrypt(rk, nrounds, in, q);
@ -616,6 +639,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
pad = q[-1];
if (pad > 0x10) {
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
*length -= len;
return;
}
@ -623,6 +647,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
for (i=1;i<pad;i++) {
if (q[i] != pad) {
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
*length -= len;
return;
}
@ -642,14 +667,18 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
cli_md5_ctx md5;
struct arc4_state arc4;
if (!length || !*length || !in)
if (!length || !*length || !in) {
noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
return NULL;
}
n = pdf->keylen + 5;
if (enc_method == ENC_AESV2)
n += 4;
key = cli_malloc(n);
if (!key)
if (!key) {
noisy_warnmsg("decrypt_any: malloc failed\n");
return NULL;
}
memcpy(key, pdf->key, pdf->keylen);
q = key + pdf->keylen;
@ -670,8 +699,10 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
n = 16;
q = cli_malloc(*length);
if (!q)
if (!q) {
noisy_warnmsg("decrypt_any: malloc failed\n");
return NULL;
}
switch (enc_method) {
case ENC_V2:
@ -679,26 +710,33 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
memcpy(q, in, *length);
arc4_init(&arc4, result, n);
arc4_apply(&arc4, q, *length);
noisy_msg(pdf, "decrypted ARC4 data\n");
break;
case ENC_AESV2:
cli_dbgmsg("cli_pdf: enc is aesv2\n");
aes_decrypt(in, length, q, result, n, 1);
noisy_msg(pdf, "decrypted AES(v2) data\n");
break;
case ENC_AESV3:
cli_dbgmsg("cli_pdf: enc is aesv3\n");
aes_decrypt(in, length, q, pdf->key, pdf->keylen, 1);
noisy_msg(pdf, "decrypted AES(v3) data\n");
break;
case ENC_IDENTITY:
cli_dbgmsg("cli_pdf: enc is identity\n");
memcpy(q, in, *length);
noisy_msg(pdf, "identity encryption\n");
break;
case ENC_NONE:
cli_dbgmsg("cli_pdf: enc is none\n");
noisy_msg(pdf, "encryption is none\n");
free(q);
return NULL;
case ENC_UNKNOWN:
cli_dbgmsg("cli_pdf: enc is unknown\n");
free(q);
noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
id>>8,id&0xff);
return NULL;
}
return q;
@ -713,6 +751,79 @@ static enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *ob
return pdf->enc_method_string;
}
enum cstate {
CSTATE_NONE,
CSTATE_TJ,
CSTATE_TJ_PAROPEN
};
static void process(struct text_norm_state *s, enum cstate *st, const char *buf, int length, int fout)
{
do {
switch (*st) {
case CSTATE_NONE:
if (*buf == '[') *st = CSTATE_TJ;
else {
const char *nl = memchr(buf, '\n', length);
if (!nl)
return;
length -= nl - buf;
buf = nl;
}
break;
case CSTATE_TJ:
if (*buf == '(') *st = CSTATE_TJ_PAROPEN;
break;
case CSTATE_TJ_PAROPEN:
if (*buf == ')') *st = CSTATE_TJ;
else {
if (text_normalize_buffer(s, buf, 1) != 1) {
cli_writen(fout, s->out, s->out_pos);
text_normalize_reset(s);
}
}
break;
}
buf++;
length--;
} while (length > 0);
}
static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
{
struct text_norm_state s;
char fullname[1024];
char outbuff[BUFSIZ];
char inbuf[BUFSIZ];
int fout, n, rc;
enum cstate st = CSTATE_NONE;
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
if (fout < 0) {
char err[128];
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
return CL_ETMPFILE;
}
text_normalize_init(&s, outbuff, sizeof(outbuff));
while (1) {
n = cli_readn(fd, inbuf, sizeof(inbuf));
if (n <= 0)
break;
process(&s, &st, inbuf, n, fout);
}
cli_writen(fout, s.out, s.out_pos);
lseek(fout, 0, SEEK_SET);
rc = cli_magic_scandesc(fout, pdf->ctx);
close(fout);
if (!pdf->ctx->engine->keeptmp)
if (cli_unlink(fullname) && rc != CL_VIRUS)
rc = CL_EUNLINK;
return rc;
}
static const char *pdf_getdict(const char *q0, int* len, const char *key);
static char *pdf_readval(const char *q, int len, const char *key);
static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
@ -777,6 +888,8 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
if (length > pdf->size || obj->start + p_stream + length > pdf->size) {
cli_dbgmsg("cli_pdf: length out of file: %ld + %ld > %ld\n",
p_stream, length, pdf->size);
noisy_warnmsg("length out of file, truncated: %ld + %ld > %ld\n",
p_stream, length, pdf->size);
length = pdf->size - (obj->start + p_stream);
}
if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) {
@ -828,9 +941,7 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
else {
decrypted = decrypt_any(pdf, obj->id, flate_in, &length,
enc);
if (!decrypted)
cli_warnmsg("cli_pdf:decrypt_any: malloc failed\n");
else
if (decrypted)
flate_in = decrypted;
}
}
@ -879,7 +990,9 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size)
rc = CL_EWRITE;
}
}
} else
noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
} else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
const char *q2;
const char *q = pdf->map+obj->start;
@ -931,8 +1044,10 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
cli_dbgmsg("cli_pdf: encrypted string\n");
decrypted = decrypt_any(pdf, obj->id, out, &n,
pdf->enc_method_string);
if (decrypted)
if (decrypted) {
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
out = decrypted;
}
}
if (filter_writen(pdf, obj, fout, out, n, &sum) != n) {
rc = CL_EWRITE;
@ -954,8 +1069,10 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
out++;
n--;
decrypted = decrypt_any(pdf, obj->id, out, &n, pdf->enc_method_string);
if (decrypted)
if (decrypted) {
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
out = decrypted;
}
}
decoded = cli_malloc(n);
if (!decoded) {
@ -992,6 +1109,14 @@ static int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj)
if (rc2 == CL_VIRUS)
rc = rc2;
}
if (rc == CL_CLEAN && (obj->flags & (1 << OBJ_CONTENTS))) {
lseek(fout, 0, SEEK_SET);
cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
rc2 = pdf_scan_contents(fout, pdf);
if (rc2 == CL_VIRUS)
rc = rc2;
noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
}
}
close(fout);
free(ascii_decoded);
@ -1010,6 +1135,7 @@ enum objstate {
STATE_OPENACTION,
STATE_LINEARIZED,
STATE_LAUNCHACTION,
STATE_CONTENTS,
STATE_ANY /* for actions table below */
};
@ -1051,7 +1177,9 @@ static struct pdfname_action pdfname_actions[] = {
{"S", OBJ_DICT, STATE_NONE, STATE_S},
{"Type", OBJ_DICT, STATE_NONE, STATE_NONE},
{"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION},
{"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION}
{"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION},
{"Page", OBJ_PAGE, STATE_NONE, STATE_NONE},
{"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS}
};
#define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT))
@ -1164,7 +1292,7 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
{
/* enough to hold common pdf names, we don't need all the names */
char pdfname[64];
const char *q2, *q3;
const char *q2, *q3, *q4;
const char *q = obj->start + pdf->map;
const char *dict, *start;
off_t dict_length;
@ -1202,8 +1330,23 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
bytesleft--;
q = q2;
} while (!q3 || q3[1] != '>');
q = q3 + 2;
q4 = NULL;
/* find real end of dictionary (in case of nested one)*/
do {
q2 = pdf_nextobject(q, bytesleft);
bytesleft -= q2 -q;
if (!q2 || bytesleft < 0) {
break;
}
q4 = memchr(q-1, '>', q2-q+1);
q2++;
bytesleft--;
q = q2;
} while (!q4 || q4[1] != '>');
if (!q4) q4 = q3;
obj->flags |= 1 << OBJ_DICT;
dict_length = q3 - dict;
dict_length = q4 - dict;
/* process pdf names */
for (q = dict;dict_length > 0;) {
@ -1252,8 +1395,10 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
}
if (objstate == STATE_LAUNCHACTION)
pdfobj_flag(pdf, obj, HAS_LAUNCHACTION);
if (dict_length > 0 && (objstate == STATE_JAVASCRIPT ||
objstate == STATE_OPENACTION)) {
if (dict_length > 0 &&
(objstate == STATE_JAVASCRIPT ||
objstate == STATE_OPENACTION ||
objstate == STATE_CONTENTS)) {
if (objstate == STATE_OPENACTION)
pdfobj_flag(pdf, obj, HAS_OPENACTION);
q2 = pdf_nextobject(q, dict_length);
@ -1271,8 +1416,10 @@ static void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
objid >> 8, objid&0xff);
obj2 = find_obj(pdf, obj, objid);
if (obj2) {
enum pdf_objflags flag = objstate == STATE_JAVASCRIPT ?
OBJ_JAVASCRIPT : OBJ_OPENACTION;
enum pdf_objflags flag =
objstate == STATE_JAVASCRIPT ? OBJ_JAVASCRIPT :
objstate == STATE_OPENACTION ? OBJ_OPENACTION :
OBJ_CONTENTS;
obj2->flags |= 1 << flag;
obj->flags &= ~(1 << flag);
} else {
@ -1535,6 +1682,7 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
n = UE ? strlen(UE) : 0;
if (n != 32) {
cli_dbgmsg("cli_pdf: UE length is not 32: %d\n", n);
noisy_warnmsg("cli_pdf: UE length is not 32: %d\n", n);
} else {
pdf->keylen = 32;
pdf->key = cli_malloc(32);
@ -1609,15 +1757,18 @@ static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
password_empty = 1;
} else {
cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
}
}
if (password_empty) {
cli_dbgmsg("cli_pdf: user password is empty\n");
noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
/* The key we computed above is the key used to encrypt the streams.
* We could decrypt it now if we wanted to */
pdf->flags |= 1 << DECRYPTABLE_PDF;
} else {
cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
/* the key is not valid, we would need the user or the owner password to
* decrypt */
}
@ -1660,11 +1811,13 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
return;
if (!pdf->fileID) {
cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
return;
}
obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
if (!obj) {
cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
return;
}
len = obj_size(pdf, obj, 1);
@ -1679,12 +1832,14 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
P = pdf_readint(q, len, "/P");
if (P == ~0u) {
cli_dbgmsg("cli_pdf: invalid P\n");
noisy_warnmsg("cli_pdf: invalid P\n");
break;
}
q2 = cli_memstr(q, len, "/Standard", 9);
if (!q2) {
cli_dbgmsg("cli_pdf: /Standard not found\n");
noisy_warnmsg("cli_pdf: /Standard not found\n");
break;
}
/* we can have both of these:
@ -1702,6 +1857,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
R = pdf_readint(q, len, "/R");
if (R == ~0u) {
cli_dbgmsg("cli_pdf: invalid R\n");
noisy_warnmsg("cli_pdf: invalid R\n");
break;
}
@ -1747,6 +1903,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
n = 0;
O = pdf_readstring(q, len, "/O", &n);
if (!O || n < oulen) {
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
if (O)
dbg_printhex("invalid O", O, n);
@ -1758,6 +1915,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
break;
if (i != n) {
dbg_printhex("too long O", O, n);
noisy_warnmsg("too long O", O, n);
break;
}
}
@ -1766,6 +1924,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
U = pdf_readstring(q, len, "/U", &n);
if (!U || n < oulen) {
cli_dbgmsg("cli_pdf: invalid U: %d\n", n);
noisy_warnmsg("cli_pdf: invalid U: %d\n", n);
if (U)
dbg_printhex("invalid U", U, n);
break;
@ -1782,6 +1941,7 @@ static void pdf_handle_enc(struct pdf_struct *pdf)
cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %d\n", R, P, length);
if (length % 8) {
cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
break;
}
check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
@ -1820,6 +1980,7 @@ int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
pdfver = cli_memstr(pdfver, versize, "%PDF-", 5);
if (!pdfver) {
cli_dbgmsg("cli_pdf: no PDF- header found\n");
noisy_warnmsg("cli_pdf: no PDF- header found\n");
return CL_SUCCESS;
}
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future

Loading…
Cancel
Save