2
* Copyright (C) 2007-2014 Sourcefire, Inc.
3
* Copyright (C) 2014 Cisco and/or its affiliates. All rights reserved.
5
* Authors: Nigel Horne, Török Edvin
7
* Also based on Matt Olney's pdf parser in snort-nrt.
9
* This program is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License version 2 as
11
* published by the Free Software Foundation.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23
* TODO: Embedded fonts
24
* TODO: Predictor image handling
28
#include "clamav-config.h"
32
#include <sys/types.h>
58
#include "bytecode_api.h"
66
*Save the file being worked on in tmp */
71
static int asciihexdecode(const char *buf, off_t len, char *output);
72
static int ascii85decode(const char *buf, off_t len, unsigned char *output);
73
static const char *pdf_nextlinestart(const char *ptr, size_t len);
74
static const char *pdf_nextobject(const char *ptr, size_t len);
76
/* PDF statistics callbacks and related */
77
struct pdfname_action;
80
static void pdf_export_json(struct pdf_struct *);
82
static void ASCIIHexDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
83
static void ASCII85Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
84
static void EmbeddedFile_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
85
static void FlateDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
86
static void Image_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
87
static void LZWDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
88
static void RunLengthDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
89
static void CCITTFaxDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
90
static void JBIG2Decode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
91
static void DCTDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
92
static void JPXDecode_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
93
static void Crypt_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
94
static void Standard_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
95
static void Sig_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
96
static void JavaScript_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
97
static void OpenAction_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
98
static void Launch_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
99
static void Page_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
100
static void Author_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
101
static void Creator_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
102
static void Producer_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
103
static void CreationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
104
static void ModificationDate_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
105
static void Title_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
106
static void Subject_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
107
static void Keywords_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
108
static void Pages_cb(struct pdf_struct *, struct pdf_obj *, struct pdfname_action *);
109
static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
110
static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
111
static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
112
static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
114
/* End PDF statistics callbacks and related */
116
static int xrefCheck(const char *xref, const char *eof)
120
while (xref < eof && (*xref == ' ' || *xref == '\n' || *xref == '\r'))
126
if (!memcmp(xref, "xref", 4)) {
127
cli_dbgmsg("cli_pdf: found xref\n");
131
/* could be xref stream */
132
for (q=xref; q+5 < eof; q++) {
133
if (!memcmp(q,"/XRef",4)) {
134
cli_dbgmsg("cli_pdf: found /XRef\n");
142
/* define this to be noisy about things that we can't parse properly */
146
#define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)
147
#define noisy_warnmsg cli_warnmsg
149
#define noisy_msg (void)
150
#define noisy_warnmsg (void)
153
static const char *findNextNonWSBack(const char *q, const char *start)
155
while (q > start && (*q == 0 || *q == 9 || *q == 0xa || *q == 0xc || *q == 0xd || *q == 0x20))
161
static int find_stream_bounds(const char *start, off_t bytesleft, off_t bytesleft2, off_t *stream, off_t *endstream, int newline_hack)
164
if ((q2 = cli_memstr(start, bytesleft, "stream", 6))) {
166
bytesleft -= q2 - start;
170
if (bytesleft >= 2 && q2[0] == '\xd' && q2[1] == '\xa') {
172
if (newline_hack && (bytesleft > 2) && q2[0] == '\xa')
174
} else if (bytesleft && q2[0] == '\xa') {
178
*stream = q2 - start;
179
bytesleft2 -= q2 - start;
184
q2 = cli_memstr(q, bytesleft2, "endstream", 9);
186
q2 = q + bytesleft2-9; /* till EOF */
188
*endstream = q2 - start;
189
if (*endstream < *stream)
190
*endstream = *stream;
198
/* Expected returns: 1 if success, 0 if no more objects, -1 if error */
199
int pdf_findobj(struct pdf_struct *pdf)
201
const char *start, *q, *q2, *q3, *eof;
204
unsigned genid, objid;
207
pdf->objs = cli_realloc2(pdf->objs, sizeof(*pdf->objs)*pdf->nobjs);
209
cli_warnmsg("cli_pdf: out of memory parsing objects (%u)\n", pdf->nobjs);
213
obj = &pdf->objs[pdf->nobjs-1];
214
memset(obj, 0, sizeof(*obj));
215
start = pdf->map+pdf->offset;
216
bytesleft = pdf->size - pdf->offset;
217
while (bytesleft > 0) {
218
q2 = cli_memstr(start, bytesleft, "obj", 3);
220
return 0;/* no more objs */
223
bytesleft -= q2 - start;
224
if (*q2 != 0 && *q2 != 9 && *q2 != 0xa && *q2 != 0xc && *q2 != 0xd && *q2 != 0x20) {
236
q = findNextNonWSBack(q2-1, start);
237
while (q > start && isdigit(*q))
241
q = findNextNonWSBack(q-1,start);
242
while (q > start && isdigit(*q))
246
obj->id = (objid << 8) | (genid&0xff);
247
obj->start = q2+4 - pdf->map;
250
eof = pdf->map + pdf->size;
251
q = pdf->map + obj->start;
253
while (q < eof && bytesleft > 0) {
254
off_t p_stream, p_endstream;
255
q2 = pdf_nextobject(q, bytesleft);
257
q2 = pdf->map + pdf->size;
260
if (find_stream_bounds(q-1, q2-q, bytesleft + (q2-q), &p_stream, &p_endstream, 1)) {
261
obj->flags |= 1 << OBJ_STREAM;
262
q2 = q-1 + p_endstream + 9;
263
bytesleft -= q2 - q + 1;
266
obj->flags |= 1 << OBJ_TRUNCATED;
267
pdf->offset = pdf->size;
268
return 1;/* truncated */
270
} else if ((q3 = cli_memstr(q-1, q2-q+1, "endobj", 6))) {
272
pdf->offset = q2 - pdf->map;
273
return 1; /* obj found and offset positioned */
282
obj->flags |= 1 << OBJ_TRUNCATED;
283
pdf->offset = pdf->size;
285
return 1;/* truncated */
288
static int filter_writen(struct pdf_struct *pdf, struct pdf_obj *obj, int fout, const char *buf, off_t len, off_t *sum)
292
if (cli_checklimits("pdf", pdf->ctx, *sum, 0, 0))
293
return len; /* pretend it was a successful write to suppress CL_EWRITE */
297
return cli_writen(fout, buf, len);
300
static void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag)
303
pdf->flags |= 1 << flag;
308
case UNTERMINATED_OBJ_DICT:
309
s = "dictionary not terminated";
311
case ESCAPED_COMMON_PDFNAME:
312
/* like /JavaScript */
313
s = "escaped common pdfname";
315
case BAD_STREAM_FILTERS:
316
s = "duplicate stream filters";
318
case BAD_PDF_VERSION:
319
s = "bad pdf version";
321
case BAD_PDF_HEADERPOS:
322
s = "bad pdf header position";
324
case BAD_PDF_TRAILER:
325
s = "bad pdf trailer";
327
case BAD_PDF_TOOMANYOBJS:
328
s = "too many pdf objs";
331
s = "bad deflate stream";
334
s = "bad deflate stream start";
336
case BAD_STREAMSTART:
337
s = "bad stream start";
340
s = "unknown filter used";
342
case BAD_ASCIIDECODE:
343
s = "bad ASCII decode";
346
s = "hex javascript";
349
s = "referencing nonexistent obj";
352
s = "has /OpenAction";
354
case HAS_LAUNCHACTION:
355
s = "has /LaunchAction";
358
s = "bad /Length, too small";
361
s = "PDF is encrypted";
364
s = "linearized PDF";
367
s = "more than 2 filters per obj";
369
case DECRYPTABLE_PDF:
370
s = "decryptable PDF";
374
cli_dbgmsg("cli_pdf: %s flagged in object %u %u\n", s, obj->id>>8, obj->id&0xff);
377
static int filter_flatedecode(struct pdf_struct *pdf, struct pdf_obj *obj, const char *buf, off_t len, int fout, off_t *sum)
391
pdfobj_flag(pdf, obj, BAD_STREAMSTART);
392
/* PDF spec says stream is followed by \r\n or \n, but not \r alone.
393
* Sample 0015315109, it has \r followed by zlib header.
394
* Flag pdf as suspicious, and attempt to extract by skipping the \r.
400
memset(&stream, 0, sizeof(stream));
401
stream.next_in = (Bytef *)buf;
402
stream.avail_in = len;
403
stream.next_out = (Bytef *)output;
404
stream.avail_out = sizeof(output);
406
zstat = inflateInit(&stream);
408
cli_warnmsg("cli_pdf: inflateInit failed\n");
413
while(stream.avail_in) {
415
zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */
418
if(stream.avail_out == 0) {
419
if ((written=filter_writen(pdf, obj, fout, output, sizeof(output), sum))!=sizeof(output)) {
420
cli_errmsg("cli_pdf: failed to write output file\n");
426
stream.next_out = (Bytef *)output;
427
stream.avail_out = sizeof(output);
433
written = sizeof(output) - stream.avail_out;
434
if (!written && !nbytes && !skipped) {
435
/* skip till EOL, and try inflating from there, sometimes
436
* PDFs contain extra whitespace */
437
const char *q = pdf_nextlinestart(buf, len);
444
stream.next_in = (Bytef *)buf;
445
stream.avail_in = len;
446
stream.next_out = (Bytef *)output;
447
stream.avail_out = sizeof(output);
448
zstat = inflateInit(&stream);
451
cli_warnmsg("cli_pdf: inflateInit failed\n");
455
pdfobj_flag(pdf, obj, BAD_FLATESTART);
460
if (filter_writen(pdf, obj, fout, output, written, sum)!=written) {
461
cli_errmsg("cli_pdf: failed to write output file\n");
467
stream.next_out = (Bytef *)output;
468
stream.avail_out = sizeof(output);
469
if (zstat == Z_STREAM_END)
473
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
474
(unsigned long)nbytes,
475
stream.msg, obj->id>>8, obj->id&0xff);
477
cli_dbgmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
478
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
481
noisy_warnmsg("cli_pdf: after writing %lu bytes, got error \"%s\" inflating PDF stream in %u %u obj\n",
482
(unsigned long)nbytes,
483
stream.msg, obj->id>>8, obj->id&0xff);
485
noisy_warnmsg("cli_pdf: after writing %lu bytes, got error %d inflating PDF stream in %u %u obj\n",
486
(unsigned long)nbytes, zstat, obj->id>>8, obj->id&0xff);
488
/* mark stream as bad only if not encrypted */
491
pdfobj_flag(pdf, obj, BAD_FLATESTART);
492
cli_dbgmsg("filter_flatedecode: No bytes, returning CL_EFORMAT for this stream.\n");
496
pdfobj_flag(pdf, obj, BAD_FLATE);
505
if(stream.avail_out != sizeof(output)) {
506
if(filter_writen(pdf, obj, fout, output, sizeof(output) - stream.avail_out, sum) < 0) {
507
cli_errmsg("cli_pdf: failed to write output file\n");
520
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid)
525
/* search starting at previous obj (if exists) */
526
i = (obj != pdf->objs) ? obj - pdf->objs : 0;
528
for (j=i;j<pdf->nobjs;j++) {
530
if (obj->id == objid)
534
/* restart search from beginning if not found */
537
if (obj->id == objid)
544
static int find_length(struct pdf_struct *pdf, struct pdf_obj *obj, const char *start, off_t len)
549
q = cli_memstr(start, len, "/Length", 7);
555
start = pdf_nextobject(q, len);
559
/* len -= start - q; */
573
if (q[0] == ' ' && q[1] == 'R') {
574
cli_dbgmsg("cli_pdf: length is in indirect object %u %u\n", length, genid);
576
obj = find_obj(pdf, obj, (length << 8) | (genid&0xff));
578
cli_dbgmsg("cli_pdf: indirect object not found\n");
582
q = pdf_nextobject(pdf->map+obj->start, pdf->size - obj->start);
584
cli_dbgmsg("cli_pdf: next object not found\n");
593
if (start - pdf->map + length+5 > pdf->size)
594
length = pdf->size - (start - pdf->map)-5;
599
#define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))
601
static int obj_size(struct pdf_struct *pdf, struct pdf_obj *obj, int binary)
603
unsigned i = obj - pdf->objs;
606
if (i < pdf->nobjs) {
607
int s = pdf->objs[i].start - obj->start - 4;
610
const char *p = pdf->map + obj->start;
611
const char *q = p + s;
613
while (q > p && (isspace(*q) || isdigit(*q)))
616
if (q > p+5 && !memcmp(q-5,"endobj",6))
619
q = findNextNonWSBack(q, p);
630
return pdf->size - obj->start;
632
return pdf->offset - obj->start - 6;
635
static int run_pdf_hooks(struct pdf_struct *pdf, enum pdf_phase phase, int fd, int dumpid)
638
struct cli_bc_ctx *bc_ctx;
639
cli_ctx *ctx = pdf->ctx;
644
bc_ctx = cli_bytecode_context_alloc();
646
cli_errmsg("cli_pdf: can't allocate memory for bc_ctx");
652
map = fmap(fd, 0, 0);
654
cli_warnmsg("can't mmap pdf extracted obj\n");
660
cli_bytecode_context_setpdf(bc_ctx, phase, pdf->nobjs, pdf->objs, &pdf->flags, pdf->size, pdf->startoff);
661
cli_bytecode_context_setctx(bc_ctx, ctx);
662
ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_PDF, map);
663
cli_bytecode_context_destroy(bc_ctx);
671
static void dbg_printhex(const char *msg, const char *hex, unsigned len);
673
static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q, char *key, unsigned key_n, int has_iv)
675
unsigned long rk[RKLENGTH(256)];
676
unsigned char iv[16];
677
unsigned len = *length;
678
unsigned char pad, i;
681
cli_dbgmsg("cli_pdf: aes_decrypt: key length: %d, data length: %d\n", key_n, (int)*length);
683
cli_dbgmsg("cli_pdf: aes_decrypt: key length is %d!\n", key_n*8);
688
cli_dbgmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
689
noisy_warnmsg("cli_pdf: aes_decrypt: len is <32: %d\n", len);
698
memset(iv, 0, sizeof(iv));
701
cli_dbgmsg("aes_decrypt: Calling rijndaelSetupDecrypt\n");
702
nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key, key_n*8);
703
cli_dbgmsg("aes_decrypt: Beginning rijndaelDecrypt\n");
708
rijndaelDecrypt(rk, nrounds, in, q);
723
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
724
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x (extra len: %d)\n", pad, len-16);
730
for (i=1;i<pad;i++) {
732
cli_dbgmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
733
noisy_warnmsg("cli_pdf: aes_decrypt: bad pad: %x != %x\n",q[i],pad);
745
cli_dbgmsg("cli_pdf: aes_decrypt: length is %d\n", (int)*length);
749
static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
751
unsigned char *key, *q, result[16];
753
struct arc4_state arc4;
755
if (!length || !*length || !in) {
756
noisy_warnmsg("decrypt failed for obj %u %u\n", id>>8, id&0xff);
761
if (enc_method == ENC_AESV2)
766
noisy_warnmsg("decrypt_any: malloc failed\n");
770
memcpy(key, pdf->key, pdf->keylen);
771
q = key + pdf->keylen;
777
if (enc_method == ENC_AESV2)
778
memcpy(q, "sAlT", 4);
780
cl_hash_data("md5", key, n, result, NULL);
787
q = cli_malloc(*length);
789
noisy_warnmsg("decrypt_any: malloc failed\n");
793
switch (enc_method) {
795
cli_dbgmsg("cli_pdf: enc is v2\n");
796
memcpy(q, in, *length);
797
arc4_init(&arc4, result, n);
798
arc4_apply(&arc4, q, *length);
800
noisy_msg(pdf, "decrypted ARC4 data\n");
804
cli_dbgmsg("cli_pdf: enc is aesv2\n");
805
aes_decrypt((const unsigned char *)in, length, q, (char *)result, n, 1);
807
noisy_msg(pdf, "decrypted AES(v2) data\n");
811
cli_dbgmsg("cli_pdf: enc is aesv3\n");
812
if (pdf->keylen == 0) {
813
cli_dbgmsg("cli_pdf: no key\n");
817
aes_decrypt((const unsigned char *)in, length, q, pdf->key, pdf->keylen, 1);
819
noisy_msg(pdf, "decrypted AES(v3) data\n");
823
cli_dbgmsg("cli_pdf: enc is identity\n");
824
memcpy(q, in, *length);
826
noisy_msg(pdf, "identity encryption\n");
830
cli_dbgmsg("cli_pdf: enc is none\n");
832
noisy_msg(pdf, "encryption is none\n");
837
cli_dbgmsg("cli_pdf: enc is unknown\n");
840
noisy_warnmsg("decrypt_any: unknown encryption method for obj %u %u\n",
849
static enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
851
if (obj->flags & (1 << OBJ_EMBEDDED_FILE))
852
return pdf->enc_method_embeddedfile;
854
if (obj->flags & (1 << OBJ_STREAM))
855
return pdf->enc_method_stream;
857
return pdf->enc_method_string;
866
static void process(struct text_norm_state *s, enum cstate *st, const char *buf, int length, int fout)
874
const char *nl = memchr(buf, '\n', length);
885
*st = CSTATE_TJ_PAROPEN;
888
case CSTATE_TJ_PAROPEN:
892
if (text_normalize_buffer(s, (const unsigned char *)buf, 1) != 1) {
893
cli_writen(fout, s->out, s->out_pos);
894
text_normalize_reset(s);
903
} while (length > 0);
906
static int pdf_scan_contents(int fd, struct pdf_struct *pdf)
908
struct text_norm_state s;
910
char outbuff[BUFSIZ];
913
enum cstate st = CSTATE_NONE;
915
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u_c", pdf->dir, (pdf->files-1));
916
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
920
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
924
text_normalize_init(&s, (unsigned char *)outbuff, sizeof(outbuff));
926
n = cli_readn(fd, inbuf, sizeof(inbuf));
930
process(&s, &st, inbuf, n, fout);
933
cli_writen(fout, s.out, s.out_pos);
935
lseek(fout, 0, SEEK_SET);
936
rc = cli_magic_scandesc(fout, pdf->ctx);
939
if (!pdf->ctx->engine->keeptmp)
940
if (cli_unlink(fullname) && rc != CL_VIRUS)
946
static const char *pdf_getdict(const char *q0, int* len, const char *key);
947
static char *pdf_readval(const char *q, int len, const char *key);
948
static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
949
static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape);
951
int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
953
char fullname[NAME_MAX + 1];
957
char *ascii_decoded = NULL;
958
char *decrypted = NULL;
961
cli_dbgmsg("pdf_extract_obj: obj %u %u\n", obj->id>>8, obj->id&0xff);
963
/* TODO: call bytecode hook here, allow override dumpability */
964
if ((!(obj->flags & (1 << OBJ_STREAM)) || (obj->flags & (1 << OBJ_HASFILTERS))) && !(obj->flags & DUMP_MASK)) {
965
/* don't dump all streams */
969
if ((obj->flags & (1 << OBJ_IMAGE)) && !(obj->flags & (1 << OBJ_FILTER_DCT))) {
970
/* don't dump / scan non-JPG images */
974
if (obj->flags & (1 << OBJ_FORCEDUMP)) {
975
/* bytecode can force dump by setting this flag */
982
cli_dbgmsg("cli_pdf: dumping obj %u %u\n", obj->id>>8, obj->id&0xff);
984
snprintf(fullname, sizeof(fullname), "%s"PATHSEP"pdf%02u", pdf->dir, pdf->files++);
985
fout = open(fullname,O_RDWR|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
988
cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
994
if (!(flags & PDF_EXTRACT_OBJ_SCAN))
995
obj->path = strdup(fullname);
998
if (obj->flags & (1 << OBJ_STREAM)) {
999
const char *start = pdf->map + obj->start;
1000
const char *flate_orig;
1001
off_t p_stream = 0, p_endstream = 0;
1002
off_t length, flate_orig_length;
1004
find_stream_bounds(start, pdf->size - obj->start,
1005
pdf->size - obj->start,
1006
&p_stream, &p_endstream,
1007
pdf->enc_method_stream <= ENC_IDENTITY &&
1008
pdf->enc_method_embeddedfile <= ENC_IDENTITY);
1010
if (p_stream && p_endstream) {
1011
const char *flate_in;
1012
long ascii_decoded_size = 0;
1013
size_t size = p_endstream - p_stream;
1016
length = find_length(pdf, obj, start, p_stream);
1020
orig_length = length;
1021
if (length > pdf->size || obj->start + p_stream + length > pdf->size) {
1022
cli_dbgmsg("cli_pdf: length out of file: %ld + %ld > %ld\n",
1023
p_stream, length, pdf->size);
1024
noisy_warnmsg("length out of file, truncated: %ld + %ld > %ld\n",
1025
p_stream, length, pdf->size);
1026
length = pdf->size - (obj->start + p_stream);
1029
if (!(obj->flags & (1 << OBJ_FILTER_FLATE)) && length <= 0) {
1030
const char *q = start + p_endstream;
1040
} else if (*q == '\r') {
1047
cli_dbgmsg("cli_pdf: calculated length %ld\n", length);
1049
if (size > (size_t)length+2) {
1050
cli_dbgmsg("cli_pdf: calculated length %ld < %ld\n",
1056
if (orig_length && size > (size_t)orig_length + 20) {
1057
cli_dbgmsg("cli_pdf: orig length: %ld, length: %ld, size: %ld\n", orig_length, length, size);
1058
pdfobj_flag(pdf, obj, BAD_STREAMLEN);
1064
cli_dbgmsg("pdf_extract_obj: length and size both 0\n");
1065
break; /* Empty stream, nothing to scan */
1069
flate_orig = flate_in = start + p_stream;
1070
flate_orig_length = length;
1071
if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
1072
enum enc_method enc = get_enc_method(pdf, obj);
1074
if (obj->flags & (1 << OBJ_FILTER_CRYPT)) {
1076
const char *q = pdf_getdict(start, &len, "/DecodeParams");
1080
char *name = pdf_readval(q, len, "/Name");
1081
cli_dbgmsg("cli_pdf: Crypt filter %s\n", name);
1083
if (name && strcmp(name, "/Identity"))
1084
enc = parse_enc_method(pdf->CF, pdf->CF_n, name, enc);
1090
if (cli_memstr(start, p_stream, "/XRef", 5)) {
1091
cli_dbgmsg("cli_pdf: cross reference stream, skipping\n");
1093
decrypted = decrypt_any(pdf, obj->id, flate_in, &length,
1097
flate_in = decrypted;
1101
if (obj->flags & (1 << OBJ_FILTER_AH)) {
1102
ascii_decoded = cli_malloc(length/2 + 1);
1103
if (!ascii_decoded) {
1104
cli_errmsg("Cannot allocate memory for ascii_decoded\n");
1108
ascii_decoded_size = asciihexdecode(flate_in,
1111
} else if (obj->flags & (1 << OBJ_FILTER_A85)) {
1112
ascii_decoded = cli_malloc(length*5);
1113
if (!ascii_decoded) {
1114
cli_errmsg("Cannot allocate memory for ascii_decoded\n");
1119
ascii_decoded_size = ascii85decode(flate_in, length, (unsigned char*)ascii_decoded);
1122
if (ascii_decoded_size < 0) {
1123
/* don't flag for images or truncated objs*/
1124
if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
1125
pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
1127
cli_dbgmsg("cli_pdf: failed to asciidecode in %u %u obj\n", obj->id>>8,obj->id&0xff);
1128
free(ascii_decoded);
1129
ascii_decoded = NULL;
1130
/* attempt to directly flatedecode it */
1133
/* either direct or ascii-decoded input */
1135
ascii_decoded_size = length;
1137
flate_in = ascii_decoded;
1139
if (obj->flags & (1 << OBJ_FILTER_FLATE)) {
1140
cli_dbgmsg("cli_pdf: deflate len %ld (orig %ld)\n", ascii_decoded_size, (long)orig_length);
1141
rc = filter_flatedecode(pdf, obj, flate_in, ascii_decoded_size, fout, &sum);
1142
if (rc == CL_EFORMAT) {
1144
flate_in = flate_orig;
1145
ascii_decoded_size = flate_orig_length;
1148
cli_dbgmsg("cli_pdf: dumping raw stream (probably encrypted)\n");
1149
noisy_warnmsg("cli_pdf: dumping raw stream, probably encrypted and we failed to decrypt'n");
1151
if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size) {
1152
cli_errmsg("cli_pdf: failed to write output file\n");
1157
if (filter_writen(pdf, obj, fout, flate_in, ascii_decoded_size, &sum) != ascii_decoded_size)
1161
noisy_warnmsg("cannot find stream bounds for obj %u %u\n", obj->id>>8, obj->id&0xff);
1164
} else if (obj->flags & (1 << OBJ_JAVASCRIPT)) {
1166
const char *q = pdf->map+obj->start;
1167
/* TODO: get obj-endobj size */
1168
off_t bytesleft = obj_size(pdf, obj, 0);
1177
q2 = cli_memstr(q, bytesleft, "/JavaScript", 11);
1181
bytesleft -= q2 - q + 11;
1184
js = pdf_readstring(q, bytesleft, "/JS", NULL, &q2, !(pdf->flags & (1<<DECRYPTABLE_PDF)));
1185
bytesleft -= q2 - q;
1189
const char *out = js;
1190
js_len = strlen(js);
1191
if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
1192
cli_dbgmsg("cli_pdf: encrypted string\n");
1193
decrypted = decrypt_any(pdf, obj->id, js, &js_len,
1194
pdf->enc_method_string);
1197
noisy_msg(pdf, "decrypted Javascript string from obj %u %u\n", obj->id>>8,obj->id&0xff);
1202
if (filter_writen(pdf, obj, fout, out, js_len, &sum) != js_len) {
1209
cli_dbgmsg("bytesleft: %d\n", (int)bytesleft);
1211
if (bytesleft > 0) {
1212
q2 = pdf_nextobject(q, bytesleft);
1214
q2 = q + bytesleft - 1;
1216
/* non-conforming PDFs that don't escape ) properly */
1217
q3 = memchr(q, ')', bytesleft);
1221
while (q2 > q && q2[-1] == ' ')
1226
filter_writen(pdf, obj, fout, q, q2 - q, &sum);
1232
} while (bytesleft > 0);
1234
off_t bytesleft = obj_size(pdf, obj, 0);
1238
else if (filter_writen(pdf, obj, fout , pdf->map + obj->start, bytesleft,&sum) != bytesleft)
1243
cli_dbgmsg("cli_pdf: extracted %ld bytes %u %u obj to %s\n", sum, obj->id>>8, obj->id&0xff, fullname);
1245
if (flags & PDF_EXTRACT_OBJ_SCAN && sum) {
1248
cli_updatelimits(pdf->ctx, sum);
1250
/* TODO: invoke bytecode on this pdf obj with metainformation associated */
1251
lseek(fout, 0, SEEK_SET);
1252
rc2 = cli_magic_scandesc(fout, pdf->ctx);
1253
if (rc2 == CL_VIRUS || rc == CL_SUCCESS)
1256
if ((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) {
1257
rc2 = run_pdf_hooks(pdf, PDF_PHASE_POSTDUMP, fout, obj - pdf->objs);
1258
if (rc2 == CL_VIRUS)
1262
if (((rc == CL_CLEAN) || ((rc == CL_VIRUS) && (pdf->ctx->options & CL_SCAN_ALLMATCHES))) && (obj->flags & (1 << OBJ_CONTENTS))) {
1263
lseek(fout, 0, SEEK_SET);
1264
cli_dbgmsg("cli_pdf: dumping contents %u %u\n", obj->id>>8, obj->id&0xff);
1266
rc2 = pdf_scan_contents(fout, pdf);
1267
if (rc2 == CL_VIRUS)
1270
noisy_msg(pdf, "extracted text from obj %u %u\n", obj->id>>8, obj->id&0xff);
1275
free(ascii_decoded);
1278
if (flags & PDF_EXTRACT_OBJ_SCAN && !pdf->ctx->engine->keeptmp)
1279
if (cli_unlink(fullname) && rc != CL_VIRUS)
1294
STATE_ANY /* for actions table below */
1297
#define NAMEFLAG_NONE 0x0
1298
#define NAMEFLAG_HEURISTIC 0x1
1300
struct pdfname_action {
1301
const char *pdfname;
1302
enum pdf_objflags set_objflag;/* OBJ_DICT is noop */
1303
enum objstate from_state;/* STATE_NONE is noop */
1304
enum objstate to_state;
1307
void (*pdf_stats_cb)(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act);
1312
static struct pdfname_action pdfname_actions[] = {
1313
{"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
1314
{"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
1315
{"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCII85Decode_cb},
1316
{"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, ASCIIHexDecode_cb},
1317
{"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, EmbeddedFile_cb},
1318
{"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
1319
{"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, FlateDecode_cb},
1320
{"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Image_cb},
1321
{"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
1322
{"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, LZWDecode_cb},
1323
{"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
1324
{"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, RunLengthDecode_cb},
1325
{"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
1326
{"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, CCITTFaxDecode_cb},
1327
{"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JBIG2Decode_cb},
1328
{"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
1329
{"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, DCTDecode_cb},
1330
{"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, JPXDecode_cb},
1331
{"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, Crypt_cb},
1332
{"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC, Standard_cb},
1333
{"Sig", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, Sig_cb},
1334
{"V", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1335
{"R", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1336
{"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC, NULL},
1337
{"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC, NULL},
1338
{"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC, JavaScript_cb},
1339
{"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1340
{"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC, NULL},
1341
{"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, NULL},
1342
{"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC, OpenAction_cb},
1343
{"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC, Launch_cb},
1344
{"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC, Page_cb},
1345
{"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC, NULL},
1346
{"Author", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Author_cb},
1347
{"Producer", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Producer_cb},
1348
{"CreationDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, CreationDate_cb},
1349
{"ModDate", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, ModificationDate_cb},
1350
{"Creator", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Creator_cb},
1351
{"Title", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Title_cb},
1352
{"Keywords", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Keywords_cb},
1353
{"Subject", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Subject_cb},
1354
{"Pages", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Pages_cb},
1355
{"Colors", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, Colors_cb},
1356
{"RichMedia", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, RichMedia_cb},
1357
{"AcroForm", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, AcroForm_cb},
1358
{"XFA", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_NONE, XFA_cb}
1361
static struct pdfname_action pdfname_actions[] = {
1362
{"ASCIIHexDecode", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1363
{"ASCII85Decode", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1364
{"A85", OBJ_FILTER_A85, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1365
{"AHx", OBJ_FILTER_AH, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1366
{"EmbeddedFile", OBJ_EMBEDDED_FILE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1367
{"FlateDecode", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1368
{"Fl", OBJ_FILTER_FLATE, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1369
{"Image", OBJ_IMAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1370
{"LZWDecode", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1371
{"LZW", OBJ_FILTER_LZW, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1372
{"RunLengthDecode", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1373
{"RL", OBJ_FILTER_RL, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1374
{"CCITTFaxDecode", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1375
{"CCF", OBJ_FILTER_FAX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1376
{"JBIG2Decode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1377
{"DCTDecode", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1378
{"DCT", OBJ_FILTER_DCT, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1379
{"JPXDecode", OBJ_FILTER_JPX, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1380
{"Crypt", OBJ_FILTER_CRYPT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
1381
{"Standard", OBJ_FILTER_STANDARD, STATE_FILTER, STATE_FILTER, NAMEFLAG_HEURISTIC},
1382
{"Sig", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
1383
{"V", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
1384
{"R", OBJ_SIGNED, STATE_ANY, STATE_NONE, NAMEFLAG_HEURISTIC},
1385
{"Linearized", OBJ_DICT, STATE_NONE, STATE_LINEARIZED, NAMEFLAG_HEURISTIC},
1386
{"Filter", OBJ_HASFILTERS, STATE_ANY, STATE_FILTER, NAMEFLAG_HEURISTIC},
1387
{"JavaScript", OBJ_JAVASCRIPT, STATE_S, STATE_JAVASCRIPT, NAMEFLAG_HEURISTIC},
1388
{"Length", OBJ_DICT, STATE_FILTER, STATE_NONE, NAMEFLAG_HEURISTIC},
1389
{"S", OBJ_DICT, STATE_NONE, STATE_S, NAMEFLAG_HEURISTIC},
1390
{"Type", OBJ_DICT, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1391
{"OpenAction", OBJ_OPENACTION, STATE_ANY, STATE_OPENACTION, NAMEFLAG_HEURISTIC},
1392
{"Launch", OBJ_LAUNCHACTION, STATE_ANY, STATE_LAUNCHACTION, NAMEFLAG_HEURISTIC},
1393
{"Page", OBJ_PAGE, STATE_NONE, STATE_NONE, NAMEFLAG_HEURISTIC},
1394
{"Contents", OBJ_CONTENTS, STATE_NONE, STATE_CONTENTS, NAMEFLAG_HEURISTIC}
1398
#define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT))
1400
static void handle_pdfname(struct pdf_struct *pdf, struct pdf_obj *obj, const char *pdfname, int escapes, enum objstate *state)
1402
struct pdfname_action *act = NULL;
1405
obj->statsflags |= OBJ_FLAG_PDFNAME_DONE;
1407
for (j=0;j<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);j++) {
1408
if (!strcmp(pdfname, pdfname_actions[j].pdfname)) {
1409
act = &pdfname_actions[j];
1415
/* these are digital signature objects, filter doesn't matter,
1416
* we don't need them anyway */
1417
if (*state == STATE_FILTER && !(obj->flags & (1 << OBJ_SIGNED)) && !(obj->flags & KNOWN_FILTERS)) {
1418
cli_dbgmsg("cli_pdf: unknown filter %s\n", pdfname);
1419
obj->flags |= 1 << OBJ_FILTER_UNKNOWN;
1425
if ((act->nameflags & NAMEFLAG_HEURISTIC) && escapes) {
1426
/* if a commonly used PDF name is escaped that is certainly
1428
cli_dbgmsg("cli_pdf: pdfname %s is escaped\n", pdfname);
1429
pdfobj_flag(pdf, obj, ESCAPED_COMMON_PDFNAME);
1433
if ((act->pdf_stats_cb))
1434
act->pdf_stats_cb(pdf, obj, act);
1437
if (act->from_state == *state || act->from_state == STATE_ANY) {
1438
*state = act->to_state;
1440
if (*state == STATE_FILTER && act->set_objflag !=OBJ_DICT && (obj->flags & (1 << act->set_objflag))) {
1441
cli_dbgmsg("cli_pdf: duplicate stream filter %s\n", pdfname);
1442
pdfobj_flag(pdf, obj, BAD_STREAM_FILTERS);
1445
obj->flags |= 1 << act->set_objflag;
1447
/* auto-reset states */
1450
*state = STATE_NONE;
1458
static int pdf_readint(const char *q0, int len, const char *key);
1460
static void pdf_parse_encrypt(struct pdf_struct *pdf, const char *enc, int len)
1465
if (len >= 16 && !strncmp(enc, "/EncryptMetadata", 16)) {
1466
q = cli_memstr(enc+16, len-16, "/Encrypt", 8);
1476
q2 = pdf_nextobject(q, len);
1477
if (!q2 || !isdigit(*q2))
1480
objid = atoi(q2) << 8;
1483
q2 = pdf_nextobject(q, len);
1484
if (!q2 || !isdigit(*q2))
1487
objid |= atoi(q2) & 0xff;
1490
q2 = pdf_nextobject(q, len);
1491
if (!q2 || *q2 != 'R')
1494
cli_dbgmsg("cli_pdf: Encrypt dictionary in obj %d %d\n", objid>>8, objid&0xff);
1496
pdf->enc_objid = objid;
1499
static void pdf_parse_trailer(struct pdf_struct *pdf, const char *s, long length)
1503
enc = cli_memstr(s, length, "/Encrypt", 8);
1507
pdf->flags |= 1 << ENCRYPTED_PDF;
1508
pdf_parse_encrypt(pdf, enc, s + length - enc);
1509
newID = pdf_readstring(s, length, "/ID", &pdf->fileIDlen, NULL, 0);
1513
pdf->fileID = newID;
1518
void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj)
1520
/* enough to hold common pdf names, we don't need all the names */
1522
const char *q2, *q3;
1523
const char *nextobj, *nextopen, *nextclose;
1524
const char *q = obj->start + pdf->map;
1525
const char *dict, *enddict, *start;
1526
off_t dict_length, full_dict_length;
1527
off_t objsize = obj_size(pdf, obj, 1);
1529
unsigned i, filters=0;
1530
unsigned blockopens=0;
1531
enum objstate objstate = STATE_NONE;
1533
json_object *pdfobj=NULL, *jsonobj=NULL;
1540
bytesleft = objsize;
1542
/* find start of dictionary */
1544
nextobj = pdf_nextobject(q, bytesleft);
1545
bytesleft -= nextobj -q;
1547
if (!nextobj || bytesleft < 0) {
1548
cli_dbgmsg("cli_pdf: %u %u obj: no dictionary\n", obj->id>>8, obj->id&0xff);
1550
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
1551
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
1558
jsonobj = cli_jsonarray(pdfobj, "ObjectsWithoutDictionaries");
1560
cli_jsonint_array(jsonobj, obj->id>>8);
1566
q3 = memchr(q-1, '<', nextobj-q+1);
1570
} while (!q3 || q3[1] != '<');
1574
bytesleft = objsize - (q - start);
1575
enddict = q + bytesleft - 1;
1577
/* find end of dictionary block */
1578
if (bytesleft < 0) {
1579
cli_dbgmsg("cli_pdf: %u %u obj: broken dictionary\n", obj->id>>8, obj->id&0xff);
1581
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
1582
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
1589
jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");
1591
cli_jsonint_array(jsonobj, obj->id>>8);
1597
/* while still looking ... */
1598
while ((q < enddict-1) && (blockopens > 0)) {
1599
/* find next close */
1600
nextclose = memchr(q, '>', enddict-q);
1601
if (nextclose && (nextclose[1] == '>')) {
1602
/* check for nested open */
1603
while ((nextopen = memchr(q-1, '<', nextclose-q+1)) != NULL) {
1604
if (nextopen[1] == '<') {
1610
/* unmatched < before next close */
1618
else if (nextclose) {
1619
/* found one > but not two */
1623
/* next closing not found */
1628
/* Was end of dictionary found? */
1630
/* probably truncated */
1631
cli_dbgmsg("cli_pdf: %u %u obj broken dictionary\n", obj->id>>8, obj->id&0xff);
1633
if (!(pdfobj) && pdf->ctx->wrkproperty != NULL) {
1634
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
1641
jsonobj = cli_jsonarray(pdfobj, "ObjectsWithBrokenDictionaries");
1643
cli_jsonint_array(jsonobj, obj->id>>8);
1649
enddict = nextclose;
1650
obj->flags |= 1 << OBJ_DICT;
1651
full_dict_length = dict_length = enddict - dict;
1653
/* This code prints the dictionary content.
1655
char * dictionary = malloc(dict_length + 1);
1657
for (i = 0; i < dict_length; i++) {
1658
if (isprint(dict[i]) || isspace(dict[i]))
1659
dictionary[i] = dict[i];
1661
dictionary[i] = '*';
1663
dictionary[dict_length] = '\0';
1664
cli_dbgmsg("cli_pdf: dictionary is <<%s>>\n", dictionary);
1670
/* process pdf names */
1671
for (q = dict;dict_length > 0;) {
1672
int escapes = 0, breakout=0;
1673
q2 = memchr(q, '/', dict_length);
1677
dict_length -= q2 - q;
1679
/* normalize PDF names */
1680
for (i = 0;dict_length > 0 && (i < sizeof(pdfname)-1); i++) {
1685
if (cli_hex2str_to(q+1, pdfname+i, 2) == -1)
1716
handle_pdfname(pdf, obj, pdfname, escapes, &objstate);
1717
if (objstate == STATE_LINEARIZED) {
1718
long trailer_end, trailer;
1720
pdfobj_flag(pdf, obj, LINEARIZED_PDF);
1721
objstate = STATE_NONE;
1722
trailer_end = pdf_readint(dict, full_dict_length, "/H");
1723
if (trailer_end > 0 && trailer_end < pdf->size) {
1724
trailer = trailer_end - 1024;
1728
q2 = pdf->map + trailer;
1729
cli_dbgmsg("cli_pdf: looking for trailer in linearized pdf: %ld - %ld\n", trailer, trailer_end);
1730
pdf_parse_trailer(pdf, q2, trailer_end - trailer);
1732
cli_dbgmsg("cli_pdf: found fileID\n");
1736
if (objstate == STATE_LAUNCHACTION)
1737
pdfobj_flag(pdf, obj, HAS_LAUNCHACTION);
1738
if (dict_length > 0 && (objstate == STATE_JAVASCRIPT || objstate == STATE_OPENACTION || objstate == STATE_CONTENTS)) {
1739
if (objstate == STATE_OPENACTION)
1740
pdfobj_flag(pdf, obj, HAS_OPENACTION);
1742
q2 = pdf_nextobject(q, dict_length);
1743
if (q2 && isdigit(*q2)) {
1744
uint32_t objid = atoi(q2) << 8;
1745
while (isdigit(*q2))
1748
q2 = pdf_nextobject(q2, dict_length);
1749
if (q2 && isdigit(*q2)) {
1750
objid |= atoi(q2) & 0xff;
1751
q2 = pdf_nextobject(q2, dict_length);
1753
if (q2 && *q2 == 'R') {
1754
struct pdf_obj *obj2;
1756
cli_dbgmsg("cli_pdf: found %s stored in indirect object %u %u\n", pdfname, objid >> 8, objid&0xff);
1757
obj2 = find_obj(pdf, obj, objid);
1759
enum pdf_objflags flag =
1760
objstate == STATE_JAVASCRIPT ? OBJ_JAVASCRIPT :
1761
objstate == STATE_OPENACTION ? OBJ_OPENACTION :
1764
obj2->flags |= 1 << flag;
1765
obj->flags &= ~(1 << flag);
1767
pdfobj_flag(pdf, obj, BAD_INDOBJ);
1773
objstate = STATE_NONE;
1777
for (i=0;i<sizeof(pdfname_actions)/sizeof(pdfname_actions[0]);i++) {
1778
const struct pdfname_action *act = &pdfname_actions[i];
1780
if ((obj->flags & (1 << act->set_objflag)) &&
1781
act->from_state == STATE_FILTER &&
1782
act->to_state == STATE_FILTER &&
1783
act->set_objflag != OBJ_FILTER_CRYPT &&
1784
act->set_objflag != OBJ_FILTER_STANDARD) {
1790
/* more than 2 non-crypt filters */
1791
pdfobj_flag(pdf, obj, MANY_FILTERS);
1794
if (obj->flags & ((1 << OBJ_SIGNED) | KNOWN_FILTERS))
1795
obj->flags &= ~(1 << OBJ_FILTER_UNKNOWN);
1797
if (obj->flags & (1 << OBJ_FILTER_UNKNOWN))
1798
pdfobj_flag(pdf, obj, UNKNOWN_FILTER);
1800
cli_dbgmsg("cli_pdf: %u %u obj flags: %02x\n", obj->id>>8, obj->id&0xff, obj->flags);
1803
static const char *pdf_getdict(const char *q0, int* len, const char *key)
1808
cli_dbgmsg("cli_pdf: bad length %d\n", *len);
1815
q = cli_memstr(q0, *len, key, strlen(key));
1817
cli_dbgmsg("cli_pdf: %s not found in dict\n", key);
1823
q = pdf_nextobject(q0 + 1, *len - 1);
1825
cli_dbgmsg("cli_pdf: %s is invalid in dict\n", key);
1836
static char *pdf_readstring(const char *q0, int len, const char *key, unsigned *slen, const char **qend, int noescape)
1839
const char *start, *q, *end;
1846
q = pdf_getdict(q0, &len, key);
1853
for (;paren > 0 && len > 0; q++,len--) {
1875
s0 = s = cli_malloc(len + 1);
1877
cli_errmsg("pdf_readstring: Unable to allocate buffer\n");
1883
memcpy(s0, start, len);
1886
for (q = start;q < end;q++) {
1907
case '(':/* fall-through */
1908
case ')':/* fall-through */
1917
if (q+1 < end && q[1] == '\n')
1934
*s++ = 64*(q[0] - '0') + 8*(q[1] - '0') + (q[2] - '0');
1955
q = memchr(q+1, '>', len);
1962
s = cli_malloc((q - start)/2 + 1);
1963
if (s == NULL) { /* oops, couldn't allocate memory */
1964
cli_dbgmsg("cli_pdf: unable to allocate memory...\n");
1968
if (cli_hex2str_to(start, s, q - start)) {
1969
cli_dbgmsg("cli_pdf: %s has bad hex value\n", key);
1974
s[(q-start)/2] = '\0';
1976
*slen = (q - start)/2;
1981
cli_dbgmsg("cli_pdf: %s is invalid string in dict\n", key);
1985
static char *pdf_readval(const char *q, int len, const char *key)
1990
q = pdf_getdict(q, &len, key);
1994
while (len > 0 && *q && *q == ' ') {
2006
while (len > 0 && *end && !(*end == '/' || (len > 1 && end[0] == '>' && end[1] == '>'))) {
2011
s = cli_malloc(end - q + 1);
2015
memcpy(s, q, end-q);
2021
static int pdf_readint(const char *q0, int len, const char *key)
2023
const char *q = pdf_getdict(q0, &len, key);
2025
return (q != NULL) ? atoi(q) : -1;
2028
static int pdf_readbool(const char *q0, int len, const char *key, int Default)
2030
const char *q = pdf_getdict(q0, &len, key);
2035
if (!strncmp(q, "true", 4))
2038
if (!strncmp(q, "false", 5))
2041
cli_dbgmsg("cli_pdf: invalid value for %s bool\n", key);
2046
static const char *key_padding =
2047
"\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
2048
"\x2e\x2e\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A";
2050
static void dbg_printhex(const char *msg, const char *hex, unsigned len)
2052
if (cli_debug_flag) {
2053
char *kh = cli_str2hex(hex, len);
2055
cli_dbgmsg("cli_pdf: %s: %s\n", msg, kh);
2061
static void check_user_password(struct pdf_struct *pdf, int R, const char *O,
2062
const char *U, int32_t P, int EM,
2064
unsigned length, unsigned oulen)
2069
struct arc4_state arc4;
2070
unsigned password_empty = 0;
2074
dbg_printhex("U: ", U, 32);
2075
dbg_printhex("O: ", O, 32);
2077
uint8_t result2[32];
2079
/* supplement to ISO3200, 3.5.2 Algorithm 3.11 */
2080
/* user validation salt */
2081
cl_sha256(U+32, 8, result2, NULL);
2082
dbg_printhex("Computed U", (const char *)result2, 32);
2083
if (!memcmp(result2, U, 32)) {
2086
/* Algorithm 3.2a could be used to recover encryption key */
2088
cl_sha256(U+40, 8, result2, NULL);
2089
n = UE ? strlen(UE) : 0;
2091
cli_dbgmsg("cli_pdf: UE length is not 32: %d\n", (int)n);
2092
noisy_warnmsg("cli_pdf: UE length is not 32: %d\n", n);
2095
pdf->key = cli_malloc(32);
2097
cli_errmsg("check_user_password: Cannot allocate memory for pdf->key\n");
2101
aes_decrypt((const unsigned char *)UE, &n, (unsigned char *)(pdf->key), (char *)result2, 32, 0);
2102
dbg_printhex("cli_pdf: Candidate encryption key", pdf->key, pdf->keylen);
2105
} else if ((R >= 2) && (R <= 4)) {
2107
size_t sz = 68 + pdf->fileIDlen + (R >= 4 && !EM ? 4 : 0);
2113
memcpy(d, key_padding, 32);
2114
memcpy(d+32, O, 32);
2115
P = le32_to_host(P);
2116
memcpy(d+64, &P, 4);
2117
memcpy(d+68, pdf->fileID, pdf->fileIDlen);
2119
/* 7.6.3.3 Algorithm 2 */
2120
/* empty password, password == padding */
2121
if (R >= 4 && !EM) {
2122
uint32_t v = 0xFFFFFFFF;
2123
memcpy(d+68+pdf->fileIDlen, &v, 4);
2126
cl_hash_data("md5", d, sz, result, NULL);
2131
/* Yes, this really is on purpose */
2133
cl_hash_data("md5", result, length/8, result, NULL);
2138
pdf->keylen = length / 8;
2139
pdf->key = cli_malloc(pdf->keylen);
2143
memcpy(pdf->key, result, pdf->keylen);
2144
dbg_printhex("md5", (const char *)result, 16);
2145
dbg_printhex("Candidate encryption key", pdf->key, pdf->keylen);
2147
/* 7.6.3.3 Algorithm 6 */
2149
/* 7.6.3.3 Algorithm 4 */
2150
memcpy(data, key_padding, 32);
2151
arc4_init(&arc4, (const uint8_t *)(pdf->key), pdf->keylen);
2152
arc4_apply(&arc4, (uint8_t *)data, 32);
2153
dbg_printhex("computed U (R2)", data, 32);
2154
if (!memcmp(data, U, 32))
2156
} else if (R >= 3) {
2157
unsigned len = pdf->keylen;
2160
d = calloc(1, 32 + pdf->fileIDlen);
2164
/* 7.6.3.3 Algorithm 5 */
2165
memcpy(d, key_padding, 32);
2166
memcpy(d+32, pdf->fileID, pdf->fileIDlen);
2167
cl_hash_data("md5", d, 32 + pdf->fileIDlen, result, NULL);
2168
memcpy(data, pdf->key, len);
2170
arc4_init(&arc4, (const uint8_t *)data, len);
2171
arc4_apply(&arc4, result, 16);
2172
for (i=1;i<=19;i++) {
2176
data[j] = pdf->key[j] ^ i;
2178
arc4_init(&arc4, (const uint8_t *)data, len);
2179
arc4_apply(&arc4, result, 16);
2182
dbg_printhex("fileID", pdf->fileID, pdf->fileIDlen);
2183
dbg_printhex("computed U (R>=3)", (const char *)result, 16);
2184
if (!memcmp(result, U, 16))
2188
cli_dbgmsg("cli_pdf: invalid revision %d\n", R);
2189
noisy_warnmsg("cli_pdf: invalid revision %d\n", R);
2192
/* Supported R is in {2,3,4,5} */
2193
cli_dbgmsg("cli_pdf: R value out of range\n");
2194
noisy_warnmsg("cli_pdf: R value out of range\n");
2199
if (password_empty) {
2200
cli_dbgmsg("cli_pdf: user password is empty\n");
2201
noisy_msg(pdf, "cli_pdf: encrypted PDF found, user password is empty, will attempt to decrypt\n");
2202
/* The key we computed above is the key used to encrypt the streams.
2203
* We could decrypt it now if we wanted to */
2204
pdf->flags |= 1 << DECRYPTABLE_PDF;
2206
/* the key is not valid, we would need the user or the owner password to decrypt */
2207
cli_dbgmsg("cli_pdf: user/owner password would be required for decryption\n");
2208
noisy_warnmsg("cli_pdf: encrypted PDF found, user password is NOT empty, cannot decrypt!\n");
2212
static enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def)
2216
enum enc_method ret = ENC_UNKNOWN;
2221
if (!strcmp(key, "Identity"))
2222
return ENC_IDENTITY;
2224
q = pdf_getdict(dict, (int *)(&len), key);
2228
CFM = pdf_readval(q, len, "/CFM");
2230
cli_dbgmsg("cli_pdf: %s CFM: %s\n", key, CFM);
2231
if (!strncmp(CFM,"V2", 2))
2233
else if (!strncmp(CFM,"AESV2",5))
2235
else if (!strncmp(CFM,"AESV3",5))
2237
else if (!strncmp(CFM,"None",4))
2246
static void pdf_handle_enc(struct pdf_struct *pdf)
2248
struct pdf_obj *obj;
2249
uint32_t len, n, R, P, length, EM = 1, i, oulen;
2250
char *O, *U, *UE, *StmF, *StrF, *EFF;
2253
if (pdf->enc_objid == ~0u)
2256
cli_dbgmsg("cli_pdf: pdf_handle_enc no file ID\n");
2257
noisy_warnmsg("cli_pdf: pdf_handle_enc no file ID\n");
2261
obj = find_obj(pdf, pdf->objs, pdf->enc_objid);
2263
cli_dbgmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
2264
noisy_warnmsg("cli_pdf: can't find encrypted object %d %d\n", pdf->enc_objid>>8, pdf->enc_objid&0xff);
2268
len = obj_size(pdf, obj, 1);
2269
q = pdf->map + obj->start;
2271
O = U = UE = StmF = StrF = EFF = NULL;
2274
pdf->enc_method_string = ENC_UNKNOWN;
2275
pdf->enc_method_stream = ENC_UNKNOWN;
2276
pdf->enc_method_embeddedfile = ENC_UNKNOWN;
2277
P = pdf_readint(q, len, "/P");
2279
cli_dbgmsg("cli_pdf: invalid P\n");
2280
noisy_warnmsg("cli_pdf: invalid P\n");
2284
q2 = cli_memstr(q, len, "/Standard", 9);
2286
cli_dbgmsg("cli_pdf: /Standard not found\n");
2287
noisy_warnmsg("cli_pdf: /Standard not found\n");
2291
/* we can have both of these:
2292
* /AESV2/Length /Standard/Length
2294
* make sure we don't mistake AES's length for Standard's */
2295
length = pdf_readint(q2, len - (q2 - q), "/Length");
2297
length = pdf_readint(q, len, "/Length");
2300
cli_dbgmsg("cli_pdf: invalid length: %d\n", length);
2304
R = pdf_readint(q, len, "/R");
2306
cli_dbgmsg("cli_pdf: invalid R\n");
2307
noisy_warnmsg("cli_pdf: invalid R\n");
2311
if ((R > 5) || (R < 2)) {
2312
cli_dbgmsg("cli_pdf: R value outside supported range [2..5]\n");
2313
noisy_warnmsg("cli_pdf: R value outside supported range [2..5]\n");
2322
if (R == 2 || R == 3) {
2323
pdf->enc_method_stream = ENC_V2;
2324
pdf->enc_method_string = ENC_V2;
2325
pdf->enc_method_embeddedfile = ENC_V2;
2326
} else if (R == 4 || R == 5) {
2327
EM = pdf_readbool(q, len, "/EncryptMetadata", 1);
2328
StmF = pdf_readval(q, len, "/StmF");
2329
StrF = pdf_readval(q, len, "/StrF");
2330
EFF = pdf_readval(q, len, "/EFF");
2332
pdf->CF = pdf_getdict(q, (int *)(&n), "/CF");
2336
cli_dbgmsg("cli_pdf: StmF: %s\n", StmF);
2338
cli_dbgmsg("cli_pdf: StrF: %s\n", StrF);
2340
cli_dbgmsg("cli_pdf: EFF: %s\n", EFF);
2342
pdf->enc_method_stream = parse_enc_method(pdf->CF, n, StmF, ENC_IDENTITY);
2343
pdf->enc_method_string = parse_enc_method(pdf->CF, n, StrF, ENC_IDENTITY);
2344
pdf->enc_method_embeddedfile = parse_enc_method(pdf->CF, n, EFF, pdf->enc_method_stream);
2350
cli_dbgmsg("cli_pdf: EncryptMetadata: %s\n", EM ? "true" : "false");
2356
UE = pdf_readstring(q, len, "/UE", &n, NULL, 0);
2365
O = pdf_readstring(q, len, "/O", &n, NULL, 0);
2366
if (!O || n < oulen) {
2367
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
2368
cli_dbgmsg("cli_pdf: invalid O: %d\n", n);
2370
dbg_printhex("invalid O", O, n);
2375
for (i=oulen;i<n;i++)
2380
dbg_printhex("too long O", O, n);
2381
noisy_warnmsg("too long O", O, n);
2387
U = pdf_readstring(q, len, "/U", &n, NULL, 0);
2388
if (!U || n < oulen) {
2389
cli_dbgmsg("cli_pdf: invalid U: %d\n", n);
2390
noisy_warnmsg("cli_pdf: invalid U: %d\n", n);
2393
dbg_printhex("invalid U", U, n);
2399
for (i=oulen;i<n;i++)
2403
dbg_printhex("too long U", U, n);
2408
cli_dbgmsg("cli_pdf: Encrypt R: %d, P %x, length: %d\n", R, P, length);
2410
cli_dbgmsg("cli_pdf: wrong key length, not multiple of 8\n");
2411
noisy_warnmsg("cli_pdf: wrong key length, not multiple of 8\n");
2414
check_user_password(pdf, R, O, U, P, EM, UE, length, oulen);
2422
int cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
2424
struct pdf_struct pdf;
2425
fmap_t *map = *ctx->fmap;
2426
size_t size = map->len - offset;
2427
off_t versize = size > 1032 ? 1032 : size;
2428
off_t map_off, bytesleft;
2430
const char *pdfver, *start, *eofmap, *q, *eof;
2431
int rc, badobjects = 0;
2432
unsigned i, alerts = 0;
2434
json_object *pdfobj=NULL;
2435
char *begin, *end, *p1;
2438
cli_dbgmsg("in cli_pdf(%s)\n", dir);
2439
memset(&pdf, 0, sizeof(pdf));
2442
pdf.enc_objid = ~0u;
2444
pdfver = start = fmap_need_off_once(map, offset, versize);
2446
/* Check PDF version */
2448
cli_errmsg("cli_pdf: mmap() failed (1)\n");
2453
if (ctx->wrkproperty)
2454
pdfobj = cli_jsonobj(ctx->wrkproperty, "PDFStats");
2457
/* offset is 0 when coming from filetype2 */
2458
pdfver = cli_memstr(pdfver, versize, "%PDF-", 5);
2460
cli_dbgmsg("cli_pdf: no PDF- header found\n");
2461
noisy_warnmsg("cli_pdf: no PDF- header found\n");
2463
pdf_export_json(&pdf);
2468
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */
2469
if (pdfver[5] != '1' || pdfver[6] != '.' ||
2470
pdfver[7] < '1' || pdfver[7] > '9') {
2471
pdf.flags |= 1 << BAD_PDF_VERSION;
2472
cli_dbgmsg("cli_pdf: bad pdf version: %.8s\n", pdfver);
2475
cli_jsonbool(pdfobj, "BadVersion", 1);
2480
begin = (char *)(pdfver+5);
2482
strtoul(end, &end, 10);
2483
p1 = cli_calloc((end - begin) + 2, 1);
2485
strncpy(p1, begin, end - begin);
2486
p1[end - begin] = '\0';
2487
cli_jsonstr(pdfobj, "PDFVersion", p1);
2494
if (pdfver != start || offset) {
2495
pdf.flags |= 1 << BAD_PDF_HEADERPOS;
2496
cli_dbgmsg("cli_pdf: PDF header is not at position 0: %ld\n",pdfver-start+offset);
2499
cli_jsonbool(pdfobj, "BadVersionLocation", 1);
2503
offset += pdfver - start;
2505
/* find trailer and xref, don't fail if not found */
2506
map_off = (off_t)map->len - 2048;
2510
bytesleft = map->len - map_off;
2512
eofmap = fmap_need_off_once(map, map_off, bytesleft);
2514
cli_errmsg("cli_pdf: mmap() failed (2)\n");
2516
pdf_export_json(&pdf);
2521
eof = eofmap + bytesleft;
2522
for (q=&eofmap[bytesleft-5]; q > eofmap; q--) {
2523
if (memcmp(q, "%%EOF", 5) == 0)
2528
pdf.flags |= 1 << BAD_PDF_TRAILER;
2529
cli_dbgmsg("cli_pdf: %%%%EOF not found\n");
2532
cli_jsonbool(pdfobj, "NoEOF", 1);
2537
/*size = q - eofmap + map_off;*/
2539
for (;q > eofmap;q--) {
2540
if (memcmp(q, "startxref", 9) == 0)
2545
pdf.flags |= 1 << BAD_PDF_TRAILER;
2546
cli_dbgmsg("cli_pdf: startxref not found\n");
2549
cli_jsonbool(pdfobj, "NoXREF", 1);
2552
for (t=q;t > eofmap; t--) {
2553
if (memcmp(t,"trailer",7) == 0)
2557
pdf_parse_trailer(&pdf, eofmap, eof - eofmap);
2560
while (q < eof && (*q == ' ' || *q == '\n' || *q == '\r')) { q++; }
2563
bytesleft = map->len - offset - xref;
2564
if (bytesleft > 4096)
2567
q = fmap_need_off_once(map, offset + xref, bytesleft);
2568
if (!q || xrefCheck(q, q+bytesleft) == -1) {
2569
cli_dbgmsg("cli_pdf: did not find valid xref\n");
2570
pdf.flags |= 1 << BAD_PDF_TRAILER;
2577
pdf.map = fmap_need_off(map, offset, size);
2579
cli_errmsg("cli_pdf: mmap() failed (3)\n");
2581
pdf_export_json(&pdf);
2586
pdf.startoff = offset;
2588
rc = run_pdf_hooks(&pdf, PDF_PHASE_PRE, -1, -1);
2589
if ((rc == CL_VIRUS) && SCAN_ALL) {
2590
cli_dbgmsg("cli_pdf: (pre hooks) returned %d\n", rc);
2594
cli_dbgmsg("cli_pdf: (pre hooks) returning %d\n", rc);
2596
pdf_export_json(&pdf);
2598
return rc == CL_BREAK ? CL_CLEAN : rc;
2601
/* parse PDF and find obj offsets */
2602
while ((rc = pdf_findobj(&pdf)) > 0) {
2603
struct pdf_obj *obj = &pdf.objs[pdf.nobjs-1];
2605
cli_dbgmsg("cli_pdf: found %d %d obj @%ld\n", obj->id >> 8, obj->id&0xff, obj->start + offset);
2612
pdf.flags |= 1 << BAD_PDF_TOOMANYOBJS;
2614
/* must parse after finding all objs, so we can flag indirect objects */
2615
for (i=0;i<pdf.nobjs;i++) {
2616
struct pdf_obj *obj = &pdf.objs[i];
2618
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
2619
cli_errmsg("Timeout reached in the PDF parser\n");
2621
pdf_export_json(&pdf);
2631
pdf_parseobj(&pdf, obj);
2634
pdf_handle_enc(&pdf);
2635
if (pdf.flags & (1 << ENCRYPTED_PDF))
2636
cli_dbgmsg("cli_pdf: encrypted pdf found, %s!\n",
2637
(pdf.flags & (1 << DECRYPTABLE_PDF)) ?
2638
"decryptable" : "not decryptable, stream will probably fail to decompress");
2640
if (DETECT_ENCRYPTED &&
2641
(pdf.flags & (1 << ENCRYPTED_PDF)) &&
2642
!(pdf.flags & (1 << DECRYPTABLE_PDF))) {
2643
/* It is encrypted, and a password/key needs to be supplied to decrypt.
2644
* This doesn't trigger for PDFs that are encrypted but don't need
2645
* a password to decrypt */
2646
cli_append_virus(ctx, "Heuristics.Encrypted.PDF");
2653
rc = run_pdf_hooks(&pdf, PDF_PHASE_PARSED, -1, -1);
2654
cli_dbgmsg("cli_pdf: (parsed hooks) returned %d\n", rc);
2655
if (rc == CL_VIRUS) {
2663
/* extract PDF objs */
2664
for (i=0;!rc && i<pdf.nobjs;i++) {
2665
struct pdf_obj *obj = &pdf.objs[i];
2667
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
2668
cli_errmsg("Timeout reached in the PDF parser\n");
2670
pdf_export_json(&pdf);
2680
rc = pdf_extract_obj(&pdf, obj, PDF_EXTRACT_OBJ_SCAN);
2683
/* Don't halt on one bad object */
2684
cli_dbgmsg("cli_pdf: bad format object, skipping to next\n");
2686
pdf.stats.ninvalidobjs++;
2700
if (pdf.flags & (1 << ENCRYPTED_PDF))
2701
pdf.flags &= ~ ((1 << BAD_FLATESTART) | (1 << BAD_STREAMSTART) | (1 << BAD_ASCIIDECODE));
2703
if (pdf.flags && !rc) {
2704
cli_dbgmsg("cli_pdf: flags 0x%02x\n", pdf.flags);
2705
rc = run_pdf_hooks(&pdf, PDF_PHASE_END, -1, -1);
2706
if (rc == CL_VIRUS) {
2713
if (!rc && SCAN_ALGO && (ctx->dconf->other & OTHER_CONF_PDFNAMEOBJ)) {
2714
if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
2715
/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
2716
cli_append_virus(ctx, "Heuristics.PDF.ObfuscatedNameObject");
2717
rc = cli_found_possibly_unwanted(ctx);
2721
/* TODO: find both trailers, and /Encrypt settings */
2722
if (pdf.flags & (1 << LINEARIZED_PDF))
2723
pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
2724
if (pdf.flags & (1 << MANY_FILTERS))
2725
pdf.flags &= ~ (1 << BAD_ASCIIDECODE);
2726
if (!rc && (pdf.flags &
2727
((1 << BAD_PDF_TOOMANYOBJS) | (1 << BAD_STREAM_FILTERS) |
2728
(1<<BAD_FLATE) | (1<<BAD_ASCIIDECODE)|
2729
(1<<UNTERMINATED_OBJ_DICT) | (1<<UNKNOWN_FILTER)))) {
2739
else if (!rc && badobjects) {
2744
pdf_export_json(&pdf);
2747
cli_dbgmsg("cli_pdf: returning %d\n", rc);
2752
/* PDF hooks may abort, don't return CL_BREAK to caller! */
2753
return rc == CL_BREAK ? CL_CLEAN : rc;
2756
static int asciihexdecode(const char *buf, off_t len, char *output)
2759
for (i=0,j=0;i+1<len;i++) {
2766
if (cli_hex2str_to(buf+i, output+j, 2) == -1) {
2781
* ascii85 inflation, returns number of bytes in output, -1 for error
2783
* See http://www.piclist.com/techref/method/encode.htm (look for base85)
2786
ascii85decode(const char *buf, off_t len, unsigned char *output)
2793
if(cli_memstr(buf, len, "~>", 2) == NULL)
2794
cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n");
2798
cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);
2801
int byte = (len--) ? (int)*ptr++ : EOF;
2803
if((byte == '~') && (len > 0) && (*ptr == '>'))
2806
if(byte >= '!' && byte <= 'u') {
2807
sum = (sum * 85) + ((uint32_t)byte - '!');
2808
if(++quintet == 5) {
2809
*output++ = (unsigned char)(sum >> 24);
2810
*output++ = (unsigned char)((sum >> 16) & 0xFF);
2811
*output++ = (unsigned char)((sum >> 8) & 0xFF);
2812
*output++ = (unsigned char)(sum & 0xFF);
2817
} else if(byte == 'z') {
2819
cli_dbgmsg("cli_pdf: ascii85decode: unexpected 'z'\n");
2828
} else if(byte == EOF) {
2829
cli_dbgmsg("cli_pdf: ascii85decode: quintet %d\n", quintet);
2834
cli_dbgmsg("cli_pdf: ascii85Decode: only 1 byte in last quintet\n");
2838
for(i = quintet; i < 5; i++)
2842
sum += (0xFFFFFF >> ((quintet - 2) * 8));
2845
for(i = 0; i < quintet - 1; i++)
2846
*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
2850
} else if(!isspace(byte)) {
2851
cli_dbgmsg("cli_pdf: ascii85Decode: invalid character 0x%x, len %lu\n", byte & 0xFF, (unsigned long)len);
2860
* Find the start of the next line
2863
pdf_nextlinestart(const char *ptr, size_t len)
2865
while(strchr("\r\n", *ptr) == NULL) {
2872
while(strchr("\r\n", *ptr) != NULL) {
2883
* Return the start of the next PDF object.
2884
* This assumes that we're not in a stream.
2887
pdf_nextobject(const char *ptr, size_t len)
2896
case '%': /* comment */
2897
p = pdf_nextlinestart(ptr, len);
2901
len -= (size_t)(p - ptr);
2908
case '[': /* Start of an array object */
2911
case '<': /* Start of a dictionary object */
2917
case '/': /* Start of a name object */
2919
case '(': /* start of JS */
2923
/* TODO: parse and return object type */
2935
/* PDF statistics */
2937
static void ASCIIHexDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
2945
pdf->stats.nasciihexdecode++;
2950
static void ASCII85Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
2958
pdf->stats.nascii85decode++;
2963
static void EmbeddedFile_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
2971
pdf->stats.nembeddedfile++;
2976
static void FlateDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
2984
pdf->stats.nflate++;
2989
static void Image_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
2997
pdf->stats.nimage++;
3002
static void LZWDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3015
static void RunLengthDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3023
pdf->stats.nrunlengthdecode++;
3028
static void CCITTFaxDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3036
pdf->stats.nfaxdecode++;
3041
static void JBIG2Decode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3043
struct json_object *pdfobj, *jbig2arr;
3051
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3054
if (!(pdf->ctx->wrkproperty))
3057
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
3061
jbig2arr = cli_jsonarray(pdfobj, "JBIG2Objects");
3065
cli_jsonint_array(jbig2arr, obj->id>>8);
3067
pdf->stats.njbig2decode++;
3072
static void DCTDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3080
pdf->stats.ndctdecode++;
3085
static void JPXDecode_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3093
pdf->stats.njpxdecode++;
3098
static void Crypt_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3106
pdf->stats.ncrypt++;
3111
static void Standard_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3119
pdf->stats.nstandard++;
3124
static void Sig_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3132
pdf->stats.nsigned++;
3137
static void JavaScript_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3139
struct json_object *pdfobj, *jbig2arr;
3146
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3149
if (!(pdf->ctx->wrkproperty))
3152
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
3156
jbig2arr = cli_jsonarray(pdfobj, "JavascriptObjects");
3160
cli_jsonint_array(jbig2arr, obj->id>>8);
3167
static void OpenAction_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3175
pdf->stats.nopenaction++;
3180
static void Launch_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3188
pdf->stats.nlaunch++;
3193
static void Page_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3206
static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3213
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3216
if (!(pdf->stats.author))
3217
pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL);
3222
static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3229
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3232
if (!(pdf->stats.creator))
3233
pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL);
3238
static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3245
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3248
if (!(pdf->stats.modificationdate))
3249
pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL);
3254
static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3261
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3264
if (!(pdf->stats.creationdate))
3265
pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL);
3270
static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3277
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3280
if (!(pdf->stats.producer))
3281
pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL);
3286
static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3293
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3296
if (!(pdf->stats.title))
3297
pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL);
3302
static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3309
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3312
if (!(pdf->stats.keywords))
3313
pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL);
3318
static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3325
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3328
if (!(pdf->stats.subject))
3329
pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL);
3334
static void RichMedia_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3342
pdf->stats.nrichmedia++;
3347
static void AcroForm_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3355
pdf->stats.nacroform++;
3360
static void XFA_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3373
static void Pages_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3375
struct pdf_array *array;
3376
const char *objstart = (const char *)(obj->start + pdf->map);
3379
unsigned long npages=0, count;
3380
struct pdf_array_node *node;
3381
json_object *pdfobj;
3385
if (!(pdf) || !(pdf->ctx->wrkproperty))
3388
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3391
objsz = obj_size(pdf, obj, 1);
3393
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
3397
begin = cli_memstr(objstart, objsz, "/Kids", 5);
3403
array = pdf_parse_array(pdf, obj, objsz, (char *)begin, NULL);
3405
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
3409
for (node = array->nodes; node != NULL; node = node->next)
3411
if (strchr((char *)(node->data), 'R'))
3414
begin = cli_memstr(obj->start + pdf->map, objsz, "/Count", 6);
3416
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
3421
while (begin - objstart < objsz && isspace(begin[0]))
3424
if (begin - objstart >= objsz) {
3428
count = strtoul(begin, NULL, 10);
3429
if (count != npages)
3430
cli_jsonbool(pdfobj, "IncorrectPagesCount", 1);
3433
pdf_free_array(array);
3438
static void Colors_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname_action *act)
3440
json_object *colorsobj, *pdfobj;
3441
unsigned long ncolors;
3447
if (!(pdf) || !(pdf->ctx) || !(pdf->ctx->wrkproperty))
3450
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
3453
objsz = obj_size(pdf, obj, 1);
3455
start = (char *)(obj->start + pdf->map);
3457
p1 = (char *)cli_memstr(start, objsz, "/Colors", 7);
3463
/* Ensure that we have at least one whitespace character plus at least one number */
3464
if (objsz - (p1 - start) < 2)
3467
while (p1 - start < objsz && isspace(p1[0]))
3470
if ((size_t)(p1 - start) == objsz)
3473
ncolors = strtoul(p1, NULL, 10);
3475
/* We only care if the number of colors > 2**24 */
3476
if (ncolors < 1<<24)
3479
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
3483
colorsobj = cli_jsonarray(pdfobj, "BigColors");
3487
cli_jsonint_array(colorsobj, obj->id>>8);
3492
static void pdf_export_json(struct pdf_struct *pdf)
3494
json_object *pdfobj;
3504
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES) || !(pdf->ctx->wrkproperty)) {
3508
pdfobj = cli_jsonobj(pdf->ctx->wrkproperty, "PDFStats");
3513
if (pdf->stats.author)
3514
cli_jsonstr(pdfobj, "Author", pdf->stats.author);
3515
if (pdf->stats.creator)
3516
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
3517
if (pdf->stats.producer)
3518
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
3519
if (pdf->stats.modificationdate)
3520
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
3521
if (pdf->stats.creationdate)
3522
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
3523
if (pdf->stats.title)
3524
cli_jsonstr(pdfobj, "Title", pdf->stats.title);
3525
if (pdf->stats.subject)
3526
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
3527
if (pdf->stats.keywords)
3528
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
3529
if (pdf->stats.ninvalidobjs)
3530
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
3532
cli_jsonint(pdfobj, "JavaScriptObjectCount", pdf->stats.njs);
3533
if (pdf->stats.nflate)
3534
cli_jsonint(pdfobj, "DeflateObjectCount", pdf->stats.nflate);
3535
if (pdf->stats.nactivex)
3536
cli_jsonint(pdfobj, "ActiveXObjectCount", pdf->stats.nactivex);
3537
if (pdf->stats.nflash)
3538
cli_jsonint(pdfobj, "FlashObjectCount", pdf->stats.nflash);
3539
if (pdf->stats.ncolors)
3540
cli_jsonint(pdfobj, "ColorCount", pdf->stats.ncolors);
3541
if (pdf->stats.nasciihexdecode)
3542
cli_jsonint(pdfobj, "AsciiHexDecodeObjectCount", pdf->stats.nasciihexdecode);
3543
if (pdf->stats.nascii85decode)
3544
cli_jsonint(pdfobj, "Ascii85DecodeObjectCount", pdf->stats.nascii85decode);
3545
if (pdf->stats.nembeddedfile)
3546
cli_jsonint(pdfobj, "EmbeddedFileCount", pdf->stats.nembeddedfile);
3547
if (pdf->stats.nimage)
3548
cli_jsonint(pdfobj, "ImageCount", pdf->stats.nimage);
3549
if (pdf->stats.nlzw)
3550
cli_jsonint(pdfobj, "LZWCount", pdf->stats.nlzw);
3551
if (pdf->stats.nrunlengthdecode)
3552
cli_jsonint(pdfobj, "RunLengthDecodeCount", pdf->stats.nrunlengthdecode);
3553
if (pdf->stats.nfaxdecode)
3554
cli_jsonint(pdfobj, "FaxDecodeCount", pdf->stats.nfaxdecode);
3555
if (pdf->stats.njbig2decode)
3556
cli_jsonint(pdfobj, "JBIG2DecodeCount", pdf->stats.njbig2decode);
3557
if (pdf->stats.ndctdecode)
3558
cli_jsonint(pdfobj, "DCTDecodeCount", pdf->stats.ndctdecode);
3559
if (pdf->stats.njpxdecode)
3560
cli_jsonint(pdfobj, "JPXDecodeCount", pdf->stats.njpxdecode);
3561
if (pdf->stats.ncrypt)
3562
cli_jsonint(pdfobj, "CryptCount", pdf->stats.ncrypt);
3563
if (pdf->stats.nstandard)
3564
cli_jsonint(pdfobj, "StandardCount", pdf->stats.nstandard);
3565
if (pdf->stats.nsigned)
3566
cli_jsonint(pdfobj, "SignedCount", pdf->stats.nsigned);
3567
if (pdf->stats.nopenaction)
3568
cli_jsonint(pdfobj, "OpenActionCount", pdf->stats.nopenaction);
3569
if (pdf->stats.nlaunch)
3570
cli_jsonint(pdfobj, "LaunchCount", pdf->stats.nlaunch);
3571
if (pdf->stats.npage)
3572
cli_jsonint(pdfobj, "PageCount", pdf->stats.npage);
3573
if (pdf->stats.nrichmedia)
3574
cli_jsonint(pdfobj, "RichMediaCount", pdf->stats.nrichmedia);
3575
if (pdf->stats.nacroform)
3576
cli_jsonint(pdfobj, "AcroFormCount", pdf->stats.nacroform);
3577
if (pdf->stats.nxfa)
3578
cli_jsonint(pdfobj, "XFACount", pdf->stats.nxfa);
3579
if (pdf->flags & (1 << BAD_PDF_VERSION))
3580
cli_jsonbool(pdfobj, "BadVersion", 1);
3581
if (pdf->flags & (1 << BAD_PDF_HEADERPOS))
3582
cli_jsonbool(pdfobj, "BadHeaderPosition", 1);
3583
if (pdf->flags & (1 << BAD_PDF_TRAILER))
3584
cli_jsonbool(pdfobj, "BadTrailer", 1);
3585
if (pdf->flags & (1 << BAD_PDF_TOOMANYOBJS))
3586
cli_jsonbool(pdfobj, "TooManyObjects", 1);
3587
if (pdf->flags & (1 << ENCRYPTED_PDF)) {
3588
cli_jsonbool(pdfobj, "Encrypted", 1);
3589
if (pdf->flags & (1 << DECRYPTABLE_PDF))
3590
cli_jsonbool(pdfobj, "Decryptable", 1);
3593
for (i=0; i < pdf->nobjs; i++) {
3594
if (pdf->objs[i].flags & (1<<OBJ_TRUNCATED)) {
3595
json_object *truncobj;
3597
truncobj = cli_jsonarray(pdfobj, "TruncatedObjects");
3601
cli_jsonint_array(truncobj, pdf->objs[i].id>>8);
3606
if ((pdf->stats.author)) {
3607
free(pdf->stats.author);
3608
pdf->stats.author = NULL;
3611
if (pdf->stats.creator) {
3612
free(pdf->stats.creator);
3613
pdf->stats.creator = NULL;
3616
if (pdf->stats.producer) {
3617
free(pdf->stats.producer);
3618
pdf->stats.producer = NULL;
3621
if (pdf->stats.modificationdate) {
3622
free(pdf->stats.modificationdate);
3623
pdf->stats.modificationdate = NULL;
3626
if (pdf->stats.creationdate) {
3627
free(pdf->stats.creationdate);
3628
pdf->stats.creationdate = NULL;
3631
if (pdf->stats.title) {
3632
free(pdf->stats.title);
3633
pdf->stats.title = NULL;
3636
if (pdf->stats.subject) {
3637
free(pdf->stats.subject);
3638
pdf->stats.subject = NULL;
3641
if (pdf->stats.keywords) {
3642
free(pdf->stats.keywords);
3643
pdf->stats.keywords = NULL;