From 94131116a90a076c49e799aa5e4c63ce0ecb0391 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 18 Oct 2009 19:54:24 +0000 Subject: more notes, testing of cleartext metadata, some crypt filter fixes git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649 --- TODO | 38 ++++++++++++++-------- libqpdf/QPDFWriter.cc | 5 +-- libqpdf/QPDF_encryption.cc | 51 +++++++++++++++++++---------- qpdf/qpdf.testcov | 6 ++++ qpdf/qtest/qpdf.test | 56 ++++++++++++++++++++++++++++++++ qpdf/qtest/qpdf/compressed-metadata.pdf | Bin 0 -> 14007 bytes qpdf/test_driver.cc | 26 +++++++++++++++ 7 files changed, 149 insertions(+), 33 deletions(-) create mode 100644 qpdf/qtest/qpdf/compressed-metadata.pdf diff --git a/TODO b/TODO index 777257f9..952f5c80 100644 --- a/TODO +++ b/TODO @@ -43,15 +43,6 @@ (http://delphi.about.com). .. use at your own risk and for whatever the purpose you want .. no support provided. Sample code provided." - * Test cases for metadata: make sure we get uncompressed metadata for - all --stream-data modes unless encrypted. Have check_metadata - function in the test suite that should report whether the metadata - is compressed (by looking at the /Filter key in the stream - dictionary) and tries to extract it filtered to make sure - encryption/decryption works. We should also grep for some string - for encrypted files where it's not supposed to be encrypted to make - sure it's also not compressed. - * R = 4, V = 4 encryption. - Update C API for R4 encryption @@ -64,7 +55,7 @@ - figure out a way to test crypt filters defined on a stream - - test extraction of metadata with and without encrypted metadata + - test combinations of linearization and v4 encryption - would be nice to test strings and streams with different encryption types, but without sample data, we'd have to write @@ -115,6 +106,29 @@ General ======= + * Handle embedded files. PDF Reference 1.7 section 3.10, "File + Specifications", discusses this. Once we can definitely recongize + all embedded files in a docucment, we can update the encryption + code to handle it properly. In QPDF_encryption.cc, search for + cf_file. Remove exception thrown if cf_file is different from + cf_stream, and write code in the stream decryption section to use + cf_file instead of cf_stream. In general, add interfaces to + get the list of embedded files and to extract them. To handle + general embedded files associated with the whole document, follow + root -> /Names -> /EmbeddedFiles -> /Names to get to the file + specification dictionaries. Then, in each file specification + dictionary, follow /EF -> /F to the actual stream. + + * The description of Crypt filters is unclear with respect to how to + use them to override /StmF for specific streams. I'm not sure + whether qpdf will do the right thing for any specific individual + streams that might have crypt filters. The specification seems to + imply that only embedded file streams and metadata streams can have + crypt filters, and there are already special cases in the code to + handle those. Most likely, it won't be a problem, but someday + someone may find a file that qpdf doesn't work on because of crypt + filters. + * The second xref stream for linearized files has to be padded only because we need file_size as computed in pass 1 to be accurate. If we were not allowing writing to a pipe, we could seek back to the @@ -150,10 +164,6 @@ General of doing this seems very low since no viewer seems to care, so it's probably not worth it. - * Embedded file streams: figure out why running qpdf over the pdf 1.7 - spec results in a file that crashes acrobat reader when you try to - save nested documents. - * QPDFObjectHandle::getPageImages() doesn't notice images in inherited resource dictionaries. See comments in that function. diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 76567db0..d094aa66 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, } else if (object.isDictionary()) { - // XXX Must not preserve Crypt filters from original stream - // dictionary writeString("<<"); writeStringQDF("\n"); std::set keys = object.getKeys(); for (std::set::iterator iter = keys.begin(); iter != keys.end(); ++iter) { + // I'm not fully clear on /Crypt keys in /DecodeParms. If + // one is found, we refuse to filter, so we should be + // safe. std::string const& key = *iter; if ((flags & f_filtered) && ((key == "/Filter") || diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 287a361a..fa16f55d 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf) { return this->crypt_filters[filter]; } + else if (filter == "/Identity") + { + return e_none; + } else { return e_unknown; @@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf) } else { + // Default: /Identity return e_none; } } @@ -432,12 +437,12 @@ QPDF::initializeEncryption() std::string method_name = cdict.getKey("/CFM").getName(); if (method_name == "/V2") { - // XXX coverage + QTC::TC("qpdf", "QPDF_encryption CFM V2"); method = e_rc4; } else if (method_name == "/AESV2") { - // XXX coverage + QTC::TC("qpdf", "QPDF_encryption CFM AESV2"); method = e_aes; } else @@ -464,6 +469,15 @@ QPDF::initializeEncryption() { this->cf_file = this->cf_stream; } + if (this->cf_file != this->cf_stream) + { + throw QPDFExc(this->file.getName(), this->file.getLastOffset(), + "This document has embedded files that are" + " encrypted differently from the rest of the file." + " qpdf does not presently support this due to" + " lack of test data; if possible, please submit" + " a bug report that includes this file."); + } } EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata); if (check_owner_password( @@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation) std::string key = getKeyForObject(objid, generation, use_aes); if (use_aes) { - // XXX coverage + QTC::TC("qpdf", "QPDF_encryption aes decode string"); assert(key.length() == Pl_AES_PDF::key_size); Pl_Buffer bufpl("decrypted string"); Pl_AES_PDF pl("aes decrypt string", &bufpl, false, @@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, encryption_method_e method = e_unknown; std::string method_source = "/StmF from /Encrypt dictionary"; - if (stream_dict.getKey("/DecodeParms").isDictionary()) - { - QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms"); - if (decode_parms.getKey("/Crypt").isDictionary()) - { - // XXX coverage - QPDFObjectHandle crypt = decode_parms.getKey("/Crypt"); - method = interpretCF(crypt.getKey("/Name")); - method_source = "stream's Crypt decode parameters"; - } - } + // NOTE: the section in the PDF specification on crypt filters + // seems to suggest that there might be a /Crypt key in + // /DecodeParms whose value is a crypt filter (.e.g., << /Name + // /StdCF >>), but implementation notes suggest this can only + // happen for metadata streams, and emperical observation + // suggests that they are otherwise ignored. Not having been + // able to find a sample file that uses crypt filters in any + // way other than /StrF and /StmF, I'm not really sure what to + // do about this. If we were to override the encryption on a + // per-stream basis using crypt filters, set method_source to + // something useful in the error message for unknown + // encryption methods (search for method_source). if (method == e_unknown) { if ((! this->encrypt_metadata) && (type == "/Metadata")) { - // XXX coverage + QTC::TC("qpdf", "QPDF_encryption cleartext metadata"); method = e_none; } else { + // NOTE: We should should use cf_file if this is an + // embedded file, but we can't yet detect embedded + // file streams as such. method = this->cf_stream; } - // XXX What about embedded file streams? } use_aes = false; switch (method) @@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, std::string key = getKeyForObject(objid, generation, use_aes); if (use_aes) { - // XXX coverage + QTC::TC("qpdf", "QPDF_encryption aes decode stream"); assert(key.length() == Pl_AES_PDF::key_size); pipeline = new Pl_AES_PDF("AES stream decryption", pipeline, false, (unsigned char*) key.c_str()); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index ebbd9cb0..fc4cb383 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0 QPDF_encryption rc4 decode string 0 QPDF_encryption rc4 decode stream 0 QPDFWriter not compressing metadata 0 +QPDF_encryption CFM V2 0 +QPDF_encryption CFM AESV2 0 +QPDF_encryption aes decode string 0 +QPDF_encryption cleartext metadata 0 +QPDF_encryption aes decode stream 0 +QPDF_encryption stream crypt filter 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 1aba8e15..b4dc07ad 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -618,6 +618,51 @@ $td->runtest("show-xref-by-id-filtered", {$td->FILE => "show-xref-by-id-filtered.out", $td->EXIT_STATUS => 0}); +show_ntests(); +# ---------- +$td->notify("--- Clear-text Metadata Tests ---"); +$n_tests += 42; + +# args: file, exp_encrypted, exp_cleartext +check_metadata("compressed-metadata.pdf", 0, 0); +check_metadata("enc-base.pdf", 0, 1); + +foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) +{ + foreach my $w (qw(compress preserve)) + { + $td->runtest("$w streams", + {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + check_metadata("a.pdf", 0, 1); + $td->runtest("encrypt normally", + {$td->COMMAND => + "qpdf --encrypt '' '' 128 -- a.pdf b.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + check_metadata("b.pdf", 1, 0); + unlink "b.pdf"; + $td->runtest("encrypt V4", + {$td->COMMAND => + "qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + check_metadata("b.pdf", 1, 0); + unlink "b.pdf"; + $td->runtest("encrypt with cleartext metadata", + {$td->COMMAND => + "qpdf --encrypt '' '' 128 --cleartext-metadata --" . + " a.pdf b.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + check_metadata("b.pdf", 1, 1); + unlink "b.pdf"; + $td->runtest("encrypt with aes and cleartext metadata", + {$td->COMMAND => + "qpdf --encrypt '' '' 128" . + " --cleartext-metadata --use-aes=y -- a.pdf b.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + check_metadata("b.pdf", 1, 1); + } +} + show_ntests(); # ---------- $td->notify("--- Linearization Tests ---"); @@ -1192,6 +1237,17 @@ sub compare_pdfs system("rm -rf tif1 tif2"); } +sub check_metadata +{ + my ($file, $exp_encrypted, $exp_cleartext) = @_; + my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" . + "test 6 done\n"; + $td->runtest("check metadata: $file", + {$td->COMMAND => "test_driver 6 $file"}, + {$td->STRING => $out, $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +} + sub get_md5_checksum { my $file = shift; diff --git a/qpdf/qtest/qpdf/compressed-metadata.pdf b/qpdf/qtest/qpdf/compressed-metadata.pdf new file mode 100644 index 00000000..1d93bb38 Binary files /dev/null and b/qpdf/qtest/qpdf/compressed-metadata.pdf differ diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 48c0eb41..16859ad2 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -282,6 +283,31 @@ void runtest(int n, char const* filename) } } } + else if (n == 6) + { + QPDFObjectHandle root = pdf.getRoot(); + QPDFObjectHandle metadata = root.getKey("/Metadata"); + if (! metadata.isStream()) + { + throw std::logic_error("test 6 run on file with no metadata"); + } + Pl_Buffer bufpl("buffer"); + metadata.pipeStreamData(&bufpl, false, false, false); + Buffer* buf = bufpl.getBuffer(); + unsigned char const* data = buf->getBuffer(); + bool cleartext = false; + if ((buf->getSize() > 9) && + (strncmp((char const*)data, "