forced pdf json strings to be utf-8 or base64 encoded

remotes/push_mirror/klin/msxml
Kevin Lin 10 years ago
parent e098bf4bd9
commit 0e7442f11e
  1. 96
      libclamav/pdf.c
  2. 46
      libclamav/str.c
  3. 2
      libclamav/str.h

@ -3511,22 +3511,86 @@ static void pdf_export_json(struct pdf_struct *pdf)
goto cleanup;
}
if (pdf->stats.author)
cli_jsonstr(pdfobj, "Author", pdf->stats.author);
if (pdf->stats.creator)
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
if (pdf->stats.producer)
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
if (pdf->stats.modificationdate)
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
if (pdf->stats.creationdate)
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
if (pdf->stats.title)
cli_jsonstr(pdfobj, "Title", pdf->stats.title);
if (pdf->stats.subject)
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
if (pdf->stats.keywords)
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
if (pdf->stats.author) {
if (cli_isutf8(pdf->stats.author, strlen(pdf->stats.author)))
cli_jsonstr(pdfobj, "Author", pdf->stats.author);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.author, strlen(pdf->stats.author));
cli_jsonstr(pdfobj, "Author", b64);
cli_jsonbool(pdfobj, "Author_base64", 1);
free(b64);
}
}
if (pdf->stats.creator) {
if (cli_isutf8(pdf->stats.creator, strlen(pdf->stats.creator)))
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.creator, strlen(pdf->stats.creator));
cli_jsonstr(pdfobj, "Creator", b64);
cli_jsonbool(pdfobj, "Creator_base64", 1);
free(b64);
}
}
if (pdf->stats.producer) {
if (cli_isutf8(pdf->stats.producer, strlen(pdf->stats.producer)))
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.producer, strlen(pdf->stats.producer));
cli_jsonstr(pdfobj, "Producer", b64);
cli_jsonbool(pdfobj, "Producer_base64", 1);
free(b64);
}
}
if (pdf->stats.modificationdate) {
if (cli_isutf8(pdf->stats.modificationdate, strlen(pdf->stats.modificationdate)))
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.modificationdate, strlen(pdf->stats.modificationdate));
cli_jsonstr(pdfobj, "ModificationDate", b64);
cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
free(b64);
}
}
if (pdf->stats.creationdate) {
if (cli_isutf8(pdf->stats.creationdate, strlen(pdf->stats.creationdate)))
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.creationdate, strlen(pdf->stats.creationdate));
cli_jsonstr(pdfobj, "CreationDate", b64);
cli_jsonbool(pdfobj, "CreationDate_base64", 1);
free(b64);
}
}
if (pdf->stats.title) {
if (cli_isutf8(pdf->stats.title, strlen(pdf->stats.title)))
cli_jsonstr(pdfobj, "Title", pdf->stats.title);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.title, strlen(pdf->stats.title));
cli_jsonstr(pdfobj, "Title", b64);
cli_jsonbool(pdfobj, "Title_base64", 1);
free(b64);
}
}
if (pdf->stats.subject) {
if (cli_isutf8(pdf->stats.subject, strlen(pdf->stats.subject)))
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.subject, strlen(pdf->stats.subject));
cli_jsonstr(pdfobj, "Subject", b64);
cli_jsonbool(pdfobj, "Subject_base64", 1);
free(b64);
}
}
if (pdf->stats.keywords) {
if (cli_isutf8(pdf->stats.keywords, strlen(pdf->stats.keywords)))
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
else {
char *b64 = (char *)cl_base64_encode(pdf->stats.keywords, strlen(pdf->stats.keywords));
cli_jsonstr(pdfobj, "Keywords", b64);
cli_jsonbool(pdfobj, "Keywords_base64", 1);
free(b64);
}
}
if (pdf->stats.ninvalidobjs)
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
if (pdf->stats.njs)

@ -666,3 +666,49 @@ char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type)
s2[j] = '\0';
return s2;
}
int cli_isutf8(const unsigned char *buf, unsigned int len)
{
unsigned int i, j;
for(i = 0; i < len; i++) {
if((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
continue;
} else if((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
return 0;
} else {
unsigned int following;
if((buf[i] & 0x20) == 0) { /* 110xxxxx */
/* c = buf[i] & 0x1f; */
following = 1;
} else if((buf[i] & 0x10) == 0) { /* 1110xxxx */
/* c = buf[i] & 0x0f; */
following = 2;
} else if((buf[i] & 0x08) == 0) { /* 11110xxx */
/* c = buf[i] & 0x07; */
following = 3;
} else if((buf[i] & 0x04) == 0) { /* 111110xx */
/* c = buf[i] & 0x03; */
following = 4;
} else if((buf[i] & 0x02) == 0) { /* 1111110x */
/* c = buf[i] & 0x01; */
following = 5;
} else {
return 0;
}
for(j = 0; j < following; j++) {
if(++i >= len)
return 0;
if((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
return 0;
/* c = (c << 6) + (buf[i] & 0x3f); */
}
}
}
return 1;
}

@ -58,5 +58,7 @@ typedef enum {
} utf16_type;
char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type);
int cli_isutf8(const unsigned char *buf, unsigned int len);
size_t cli_strlcat(char *dst, const char *src, size_t sz); /* libclamav/strlcat.c */
#endif

Loading…
Cancel
Save