From 93ac1695a4b79f3d5b71e2d57ed876c28866d2c9 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 29 Dec 2012 19:00:05 -0500 Subject: Support files with only attachments encrypted Test cases added in a future commit since they depend on /R=6 support. --- TODO | 19 +--- include/qpdf/QPDF.hh | 2 + libqpdf/QPDF.cc | 36 ++++++ libqpdf/QPDFWriter.cc | 28 ++--- libqpdf/QPDF_Stream.cc | 250 ++++++++++++++++++++++++----------------- libqpdf/QPDF_encryption.cc | 91 ++++++++------- libqpdf/qpdf/QPDF_Stream.hh | 3 + qpdf/qpdf.testcov | 1 - qpdf/qtest/qpdf/obj0-check.out | 2 +- 9 files changed, 253 insertions(+), 179 deletions(-) diff --git a/TODO b/TODO index b231e994..8a4baaf2 100644 --- a/TODO +++ b/TODO @@ -89,20 +89,11 @@ Index: QPDFWriter.cc } ------------------------------ - * Handle embedded files. PDF Reference 1.7 section 3.10, "File - Specifications", discusses this. Once we can definitely recognize - all embedded files in a document, we can update the encryption - code to handle it properly. In QPDF_encryption.cc, search for - cf_file. Remove exception thrown if cf_file is different from - cf_stream, and write code in the stream decryption section to use - cf_file instead of cf_stream. In general, add interfaces to get - the list of embedded files and to extract them. To handle general - embedded files associated with the whole document, follow root -> - /Names -> /EmbeddedFiles -> /Names to get to the file specification - dictionaries. Then, in each file specification dictionary, follow - /EF -> /F to the actual stream. There may be other places file - specification dictionaries may appear, and there are also /RF keys - with related files, so reread section 3.10 carefully. + * Provide APIs for embedded files. See *attachments*.pdf in test + suite. The private method findAttachmentStreams finds at least + cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). + PDF Reference 1.7 section 3.10, "File Specifications", discusses + this. A sourceforge user asks if qpdf can handle extracting and embedded resources and references these tools, which may be useful as a diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 866b11b3..7028bf1c 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -604,6 +604,7 @@ class QPDF int& act_objid, int& act_generation); PointerHolder resolve(int objid, int generation); void resolveObjectsInStream(int obj_stream_number); + void findAttachmentStreams(); // Calls finish() on the pipeline when done but does not delete it void pipeStreamData(int objid, int generation, @@ -1004,6 +1005,7 @@ class QPDF PointerHolder copied_streams; // copied_stream_data_provider is owned by copied_streams CopiedStreamDataProvider* copied_stream_data_provider; + std::set attachment_streams; // Linearization data qpdf_offset_t first_xref_item_offset; // actual value from file diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 9d5d8240..bee8cde1 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -314,6 +314,7 @@ QPDF::parse(char const* password) } initializeEncryption(); + findAttachmentStreams(); } void @@ -2069,3 +2070,38 @@ QPDF::pipeStreamData(int objid, int generation, } pipeline->finish(); } + +void +QPDF::findAttachmentStreams() +{ + QPDFObjectHandle root = getRoot(); + QPDFObjectHandle names = root.getKey("/Names"); + if (! names.isDictionary()) + { + return; + } + QPDFObjectHandle embeddedFiles = names.getKey("/EmbeddedFiles"); + if (! embeddedFiles.isDictionary()) + { + return; + } + names = embeddedFiles.getKey("/Names"); + if (! names.isArray()) + { + return; + } + for (int i = 0; i < names.getArrayNItems(); ++i) + { + QPDFObjectHandle item = names.getArrayItem(i); + if (item.isDictionary() && + item.getKey("/Type").isName() && + (item.getKey("/Type").getName() == "/Filespec") && + item.getKey("/EF").isDictionary() && + item.getKey("/EF").getKey("/F").isStream()) + { + QPDFObjectHandle stream = item.getKey("/EF").getKey("/F"); + this->attachment_streams.insert( + ObjGen(stream.getObjectID(), stream.getGeneration())); + } + } +} diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index cde76af9..8bfb6ff9 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -470,27 +470,13 @@ QPDFWriter::copyEncryptionParameters(QPDF& qpdf) } if (V >= 4) { - if (encrypt.hasKey("/CF") && - encrypt.getKey("/CF").isDictionary() && - encrypt.hasKey("/StmF") && - encrypt.getKey("/StmF").isName()) - { - // Determine whether to use AES from StmF. QPDFWriter - // can't write files with different StrF and StmF. - QPDFObjectHandle CF = encrypt.getKey("/CF"); - QPDFObjectHandle StmF = encrypt.getKey("/StmF"); - if (CF.hasKey(StmF.getName()) && - CF.getKey(StmF.getName()).isDictionary()) - { - QPDFObjectHandle StmF_data = CF.getKey(StmF.getName()); - if (StmF_data.hasKey("/CFM") && - StmF_data.getKey("/CFM").isName() && - StmF_data.getKey("/CFM").getName() == "/AESV2") - { - this->encrypt_use_aes = true; - } - } - } + // When copying encryption parameters, use AES even if the + // original file did not. Acrobat doesn't create files + // with V >= 4 that don't use AES, and the logic of + // figuring out whether AES is used or not is complicated + // with /StmF, /StrF, and /EFF all potentially having + // different values. + this->encrypt_use_aes = true; } QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", this->encrypt_metadata ? 0 : 1); diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 970ee58b..88b8e8ff 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -90,6 +90,80 @@ QPDF_Stream::getRawStreamData() return buf.getBuffer(); } +bool +QPDF_Stream::understandDecodeParams( + std::string const& filter, QPDFObjectHandle decode_obj, + int& predictor, int& columns, bool& early_code_change) +{ + bool filterable = true; + std::set keys = decode_obj.getKeys(); + for (std::set::iterator iter = keys.begin(); + iter != keys.end(); ++iter) + { + std::string const& key = *iter; + if ((filter == "/FlateDecode") && (key == "/Predictor")) + { + QPDFObjectHandle predictor_obj = decode_obj.getKey(key); + if (predictor_obj.isInteger()) + { + predictor = predictor_obj.getIntValue(); + if (! ((predictor == 1) || (predictor == 12))) + { + filterable = false; + } + } + else + { + filterable = false; + } + } + else if ((filter == "/LZWDecode") && (key == "/EarlyChange")) + { + QPDFObjectHandle earlychange_obj = decode_obj.getKey(key); + if (earlychange_obj.isInteger()) + { + int earlychange = earlychange_obj.getIntValue(); + early_code_change = (earlychange == 1); + if (! ((earlychange == 0) || (earlychange == 1))) + { + filterable = false; + } + } + else + { + filterable = false; + } + } + else if (key == "/Columns") + { + QPDFObjectHandle columns_obj = decode_obj.getKey(key); + if (columns_obj.isInteger()) + { + columns = columns_obj.getIntValue(); + } + else + { + filterable = false; + } + } + else if ((filter == "/Crypt") && + (((key == "/Type") || (key == "/Name")) && + (decode_obj.getKey("/Type").isNull() || + (decode_obj.getKey("/Type").isName() && + (decode_obj.getKey("/Type").getName() == + "/CryptFilterDecodeParms"))))) + { + // we handle this in decryptStream + } + else + { + filterable = false; + } + } + + return filterable; +} + bool QPDF_Stream::filterable(std::vector& filters, int& predictor, int& columns, @@ -110,106 +184,6 @@ QPDF_Stream::filterable(std::vector& filters, filter_abbreviations["/DCT"] = "/DCTDecode"; } - // Initialize values to their defaults as per the PDF spec - predictor = 1; - columns = 0; - early_code_change = true; - - bool filterable = true; - - // See if we can support any decode parameters that are specified. - - QPDFObjectHandle decode_obj = - this->stream_dict.getKey("/DecodeParms"); - if (decode_obj.isNull()) - { - // no problem - } - else if (decode_obj.isDictionary()) - { - std::set keys = decode_obj.getKeys(); - for (std::set::iterator iter = keys.begin(); - iter != keys.end(); ++iter) - { - std::string const& key = *iter; - if (key == "/Predictor") - { - QPDFObjectHandle predictor_obj = decode_obj.getKey(key); - if (predictor_obj.isInteger()) - { - predictor = predictor_obj.getIntValue(); - if (! ((predictor == 1) || (predictor == 12))) - { - filterable = false; - } - } - else - { - filterable = false; - } - } - else if (key == "/EarlyChange") - { - QPDFObjectHandle earlychange_obj = decode_obj.getKey(key); - if (earlychange_obj.isInteger()) - { - int earlychange = earlychange_obj.getIntValue(); - early_code_change = (earlychange == 1); - if (! ((earlychange == 0) || (earlychange == 1))) - { - filterable = false; - } - } - else - { - filterable = false; - } - } - else if (key == "/Columns") - { - QPDFObjectHandle columns_obj = decode_obj.getKey(key); - if (columns_obj.isInteger()) - { - columns = columns_obj.getIntValue(); - } - else - { - filterable = false; - } - } - else if (((key == "/Type") || (key == "/Name")) && - decode_obj.getKey("/Type").isName() && - (decode_obj.getKey("/Type").getName() == - "/CryptFilterDecodeParms")) - { - // we handle this in decryptStream - } - else - { - filterable = false; - } - } - } - else - { - // Ignore for now -- some filter types, like CCITTFaxDecode, - // use types other than dictionary for this. - QTC::TC("qpdf", "QPDF_Stream ignore non-dictionary DecodeParms"); - - filterable = false; - } - - if ((predictor > 1) && (columns == 0)) - { - // invalid - filterable = false; - } - - if (! filterable) - { - return false; - } - // Check filters QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter"); @@ -254,8 +228,7 @@ QPDF_Stream::filterable(std::vector& filters, "stream filter type is not name or array"); } - // `filters' now contains a list of filters to be applied in - // order. See which ones we can support. + bool filterable = true; for (std::vector::iterator iter = filters.begin(); iter != filters.end(); ++iter) @@ -278,6 +251,79 @@ QPDF_Stream::filterable(std::vector& filters, } } + if (! filterable) + { + return false; + } + + // `filters' now contains a list of filters to be applied in + // order. See which ones we can support. + + // Initialize values to their defaults as per the PDF spec + predictor = 1; + columns = 0; + early_code_change = true; + + // See if we can support any decode parameters that are specified. + + QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms"); + std::vector decode_parms; + if (decode_obj.isArray()) + { + for (int i = 0; i < decode_obj.getArrayNItems(); ++i) + { + decode_parms.push_back(decode_obj.getArrayItem(i)); + } + } + else + { + for (unsigned int i = 0; i < filters.size(); ++i) + { + decode_parms.push_back(decode_obj); + } + } + + if (decode_parms.size() != filters.size()) + { + throw QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), + "", this->offset, + "stream /DecodeParms length is" + " inconsistent with filters"); + } + + for (unsigned int i = 0; i < filters.size(); ++i) + { + QPDFObjectHandle decode_item = decode_parms[i]; + if (decode_item.isNull()) + { + // okay + } + else if (decode_item.isDictionary()) + { + if (! understandDecodeParams( + filters[i], decode_item, + predictor, columns, early_code_change)) + { + filterable = false; + } + } + else + { + filterable = false; + } + } + + if ((predictor > 1) && (columns == 0)) + { + // invalid + filterable = false; + } + + if (! filterable) + { + return false; + } + return filterable; } diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 6ab51a03..568b8771 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -573,28 +573,6 @@ QPDF::initializeEncryption() { this->cf_file = this->cf_stream; } - if (this->cf_file != this->cf_stream) - { - // The issue for qpdf is that it can't tell the difference - // between an embedded file stream and a regular stream. - // Search for a comment containing cf_file. To fix this, - // we need files with encrypted embedded files and - // non-encrypted native streams and vice versa. Also if - // it is possible for them to be encrypted in different - // ways, we should have some of those too. In cases where - // we can detect whether a stream is encrypted or not, we - // might want to try to detecet that automatically in - // defense of possible logic errors surrounding detection - // of embedded file streams, unless that's really clear - // from the specification. - throw QPDFExc(qpdf_e_unsupported, this->file->getName(), - "encryption dictionary", this->file->getLastOffset(), - "This document has embedded files that are" - " encrypted differently from the rest of the file." - " qpdf does not presently support this due to" - " lack of test data; if possible, please submit" - " a bug report that includes this file."); - } } EncryptionData data(V, R, Length / 8, P, O, U, "", "", "", id1, this->encrypt_metadata); @@ -737,18 +715,48 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, encryption_method_e method = e_unknown; std::string method_source = "/StmF from /Encrypt dictionary"; - if (stream_dict.getKey("/Filter").isOrHasName("/Crypt") && - stream_dict.getKey("/DecodeParms").isDictionary()) - { - QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms"); - if (decode_parms.getKey("/Type").isName() && - (decode_parms.getKey("/Type").getName() == - "/CryptFilterDecodeParms")) - { - QTC::TC("qpdf", "QPDF_encryption stream crypt filter"); - method = interpretCF(decode_parms.getKey("/Name")); - method_source = "stream's Crypt decode parameters"; - } + if (stream_dict.getKey("/Filter").isOrHasName("/Crypt")) + { + if (stream_dict.getKey("/DecodeParms").isDictionary()) + { + QPDFObjectHandle decode_parms = + stream_dict.getKey("/DecodeParms"); + if (decode_parms.getKey("/Type").isName() && + (decode_parms.getKey("/Type").getName() == + "/CryptFilterDecodeParms")) + { + QTC::TC("qpdf", "QPDF_encryption stream crypt filter"); + method = interpretCF(decode_parms.getKey("/Name")); + method_source = "stream's Crypt decode parameters"; + } + } + else if (stream_dict.getKey("/DecodeParms").isArray() && + stream_dict.getKey("/Filter").isArray()) + { + QPDFObjectHandle filter = stream_dict.getKey("/Filter"); + QPDFObjectHandle decode = stream_dict.getKey("/DecodeParms"); + if (filter.getArrayNItems() == decode.getArrayNItems()) + { + for (int i = 0; i < filter.getArrayNItems(); ++i) + { + if (filter.getArrayItem(i).isName() && + (filter.getArrayItem(i).getName() == "/Crypt")) + { + QPDFObjectHandle crypt_params = + decode.getArrayItem(i); + if (crypt_params.isDictionary() && + crypt_params.getKey("/Name").isName()) + { +// XXX QTC::TC("qpdf", "QPDF_encrypt crypt array"); + method = interpretCF( + crypt_params.getKey("/Name")); + method_source = "stream's Crypt " + "decode parameters (array)"; + } + } + } + } + } } if (method == e_unknown) @@ -760,12 +768,15 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, } else { - // NOTE: We should should use cf_file if this is an - // embedded file, but we can't yet detect embedded - // file streams as such. When fixing, search for all - // occurrences of cf_file to find a reference to this - // comment. - method = this->cf_stream; + if (this->attachment_streams.count( + ObjGen(objid, generation)) > 0) + { + method = this->cf_file; + } + else + { + method = this->cf_stream; + } } } use_aes = false; diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index 34eaceeb..6e5dacf0 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -45,6 +45,9 @@ class QPDF_Stream: public QPDFObject void replaceFilterData(QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length); + bool understandDecodeParams( + std::string const& filter, QPDFObjectHandle decode_params, + int& predictor, int& columns, bool& early_code_change); bool filterable(std::vector& filters, int& predictor, int& columns, bool& early_code_change); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index cf9aed20..4b748bc8 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -116,7 +116,6 @@ qpdf unable to filter 0 QPDF_String non-trivial UTF-16 0 QPDF xref overwrite object 0 QPDF decoding error warning 0 -QPDF_Stream ignore non-dictionary DecodeParms 0 qpdf-c called qpdf_init 0 qpdf-c called qpdf_cleanup 0 qpdf-c called qpdf_more_warnings 0 diff --git a/qpdf/qtest/qpdf/obj0-check.out b/qpdf/qtest/qpdf/obj0-check.out index 1b4bcf46..d3b5c2b0 100644 --- a/qpdf/qtest/qpdf/obj0-check.out +++ b/qpdf/qtest/qpdf/obj0-check.out @@ -1,7 +1,7 @@ -checking obj0.pdf WARNING: obj0.pdf: file is damaged WARNING: obj0.pdf (object 1 0, file position 77): expected n n obj WARNING: obj0.pdf: Attempting to reconstruct cross-reference table +checking obj0.pdf PDF Version: 1.3 File is not encrypted File is not linearized -- cgit v1.2.3-54-g00ecf