aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2009-10-18 21:54:24 +0200
committerJay Berkenbilt <ejb@ql.org>2009-10-18 21:54:24 +0200
commit94131116a90a076c49e799aa5e4c63ce0ecb0391 (patch)
treee985eaf3f3cc9852ef406723a140bddb619bf704
parent3356b6708d72688831a62d0928345fb07f2d785e (diff)
downloadqpdf-94131116a90a076c49e799aa5e4c63ce0ecb0391.tar.zst
more notes, testing of cleartext metadata, some crypt filter fixes
git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649
-rw-r--r--TODO38
-rw-r--r--libqpdf/QPDFWriter.cc5
-rw-r--r--libqpdf/QPDF_encryption.cc51
-rw-r--r--qpdf/qpdf.testcov6
-rw-r--r--qpdf/qtest/qpdf.test56
-rw-r--r--qpdf/qtest/qpdf/compressed-metadata.pdfbin0 -> 14007 bytes
-rw-r--r--qpdf/test_driver.cc26
7 files changed, 149 insertions, 33 deletions
diff --git a/TODO b/TODO
index 777257f9..952f5c80 100644
--- a/TODO
+++ b/TODO
@@ -43,15 +43,6 @@
(http://delphi.about.com). .. use at your own risk and for whatever
the purpose you want .. no support provided. Sample code provided."
- * Test cases for metadata: make sure we get uncompressed metadata for
- all --stream-data modes unless encrypted. Have check_metadata
- function in the test suite that should report whether the metadata
- is compressed (by looking at the /Filter key in the stream
- dictionary) and tries to extract it filtered to make sure
- encryption/decryption works. We should also grep for some string
- for encrypted files where it's not supposed to be encrypted to make
- sure it's also not compressed.
-
* R = 4, V = 4 encryption.
- Update C API for R4 encryption
@@ -64,7 +55,7 @@
- figure out a way to test crypt filters defined on a stream
- - test extraction of metadata with and without encrypted metadata
+ - test combinations of linearization and v4 encryption
- would be nice to test strings and streams with different
encryption types, but without sample data, we'd have to write
@@ -115,6 +106,29 @@
General
=======
+ * Handle embedded files. PDF Reference 1.7 section 3.10, "File
+ Specifications", discusses this. Once we can definitely recongize
+ all embedded files in a docucment, we can update the encryption
+ code to handle it properly. In QPDF_encryption.cc, search for
+ cf_file. Remove exception thrown if cf_file is different from
+ cf_stream, and write code in the stream decryption section to use
+ cf_file instead of cf_stream. In general, add interfaces to
+ get the list of embedded files and to extract them. To handle
+ general embedded files associated with the whole document, follow
+ root -> /Names -> /EmbeddedFiles -> /Names to get to the file
+ specification dictionaries. Then, in each file specification
+ dictionary, follow /EF -> /F to the actual stream.
+
+ * The description of Crypt filters is unclear with respect to how to
+ use them to override /StmF for specific streams. I'm not sure
+ whether qpdf will do the right thing for any specific individual
+ streams that might have crypt filters. The specification seems to
+ imply that only embedded file streams and metadata streams can have
+ crypt filters, and there are already special cases in the code to
+ handle those. Most likely, it won't be a problem, but someday
+ someone may find a file that qpdf doesn't work on because of crypt
+ filters.
+
* The second xref stream for linearized files has to be padded only
because we need file_size as computed in pass 1 to be accurate. If
we were not allowing writing to a pipe, we could seek back to the
@@ -150,10 +164,6 @@ General
of doing this seems very low since no viewer seems to care, so it's
probably not worth it.
- * Embedded file streams: figure out why running qpdf over the pdf 1.7
- spec results in a file that crashes acrobat reader when you try to
- save nested documents.
-
* QPDFObjectHandle::getPageImages() doesn't notice images in
inherited resource dictionaries. See comments in that function.
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 76567db0..d094aa66 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
}
else if (object.isDictionary())
{
- // XXX Must not preserve Crypt filters from original stream
- // dictionary
writeString("<<");
writeStringQDF("\n");
std::set<std::string> keys = object.getKeys();
for (std::set<std::string>::iterator iter = keys.begin();
iter != keys.end(); ++iter)
{
+ // I'm not fully clear on /Crypt keys in /DecodeParms. If
+ // one is found, we refuse to filter, so we should be
+ // safe.
std::string const& key = *iter;
if ((flags & f_filtered) &&
((key == "/Filter") ||
diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc
index 287a361a..fa16f55d 100644
--- a/libqpdf/QPDF_encryption.cc
+++ b/libqpdf/QPDF_encryption.cc
@@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf)
{
return this->crypt_filters[filter];
}
+ else if (filter == "/Identity")
+ {
+ return e_none;
+ }
else
{
return e_unknown;
@@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf)
}
else
{
+ // Default: /Identity
return e_none;
}
}
@@ -432,12 +437,12 @@ QPDF::initializeEncryption()
std::string method_name = cdict.getKey("/CFM").getName();
if (method_name == "/V2")
{
- // XXX coverage
+ QTC::TC("qpdf", "QPDF_encryption CFM V2");
method = e_rc4;
}
else if (method_name == "/AESV2")
{
- // XXX coverage
+ QTC::TC("qpdf", "QPDF_encryption CFM AESV2");
method = e_aes;
}
else
@@ -464,6 +469,15 @@ QPDF::initializeEncryption()
{
this->cf_file = this->cf_stream;
}
+ if (this->cf_file != this->cf_stream)
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "This document has embedded files that are"
+ " encrypted differently from the rest of the file."
+ " qpdf does not presently support this due to"
+ " lack of test data; if possible, please submit"
+ " a bug report that includes this file.");
+ }
}
EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata);
if (check_owner_password(
@@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation)
std::string key = getKeyForObject(objid, generation, use_aes);
if (use_aes)
{
- // XXX coverage
+ QTC::TC("qpdf", "QPDF_encryption aes decode string");
assert(key.length() == Pl_AES_PDF::key_size);
Pl_Buffer bufpl("decrypted string");
Pl_AES_PDF pl("aes decrypt string", &bufpl, false,
@@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
encryption_method_e method = e_unknown;
std::string method_source = "/StmF from /Encrypt dictionary";
- if (stream_dict.getKey("/DecodeParms").isDictionary())
- {
- QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms");
- if (decode_parms.getKey("/Crypt").isDictionary())
- {
- // XXX coverage
- QPDFObjectHandle crypt = decode_parms.getKey("/Crypt");
- method = interpretCF(crypt.getKey("/Name"));
- method_source = "stream's Crypt decode parameters";
- }
- }
+ // NOTE: the section in the PDF specification on crypt filters
+ // seems to suggest that there might be a /Crypt key in
+ // /DecodeParms whose value is a crypt filter (.e.g., << /Name
+ // /StdCF >>), but implementation notes suggest this can only
+ // happen for metadata streams, and emperical observation
+ // suggests that they are otherwise ignored. Not having been
+ // able to find a sample file that uses crypt filters in any
+ // way other than /StrF and /StmF, I'm not really sure what to
+ // do about this. If we were to override the encryption on a
+ // per-stream basis using crypt filters, set method_source to
+ // something useful in the error message for unknown
+ // encryption methods (search for method_source).
if (method == e_unknown)
{
if ((! this->encrypt_metadata) && (type == "/Metadata"))
{
- // XXX coverage
+ QTC::TC("qpdf", "QPDF_encryption cleartext metadata");
method = e_none;
}
else
{
+ // NOTE: We should should use cf_file if this is an
+ // embedded file, but we can't yet detect embedded
+ // file streams as such.
method = this->cf_stream;
}
- // XXX What about embedded file streams?
}
use_aes = false;
switch (method)
@@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
std::string key = getKeyForObject(objid, generation, use_aes);
if (use_aes)
{
- // XXX coverage
+ QTC::TC("qpdf", "QPDF_encryption aes decode stream");
assert(key.length() == Pl_AES_PDF::key_size);
pipeline = new Pl_AES_PDF("AES stream decryption", pipeline,
false, (unsigned char*) key.c_str());
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index ebbd9cb0..fc4cb383 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0
QPDF_encryption rc4 decode string 0
QPDF_encryption rc4 decode stream 0
QPDFWriter not compressing metadata 0
+QPDF_encryption CFM V2 0
+QPDF_encryption CFM AESV2 0
+QPDF_encryption aes decode string 0
+QPDF_encryption cleartext metadata 0
+QPDF_encryption aes decode stream 0
+QPDF_encryption stream crypt filter 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 1aba8e15..b4dc07ad 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -620,6 +620,51 @@ $td->runtest("show-xref-by-id-filtered",
show_ntests();
# ----------
+$td->notify("--- Clear-text Metadata Tests ---");
+$n_tests += 42;
+
+# args: file, exp_encrypted, exp_cleartext
+check_metadata("compressed-metadata.pdf", 0, 0);
+check_metadata("enc-base.pdf", 0, 1);
+
+foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
+{
+ foreach my $w (qw(compress preserve))
+ {
+ $td->runtest("$w streams",
+ {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ check_metadata("a.pdf", 0, 1);
+ $td->runtest("encrypt normally",
+ {$td->COMMAND =>
+ "qpdf --encrypt '' '' 128 -- a.pdf b.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ check_metadata("b.pdf", 1, 0);
+ unlink "b.pdf";
+ $td->runtest("encrypt V4",
+ {$td->COMMAND =>
+ "qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ check_metadata("b.pdf", 1, 0);
+ unlink "b.pdf";
+ $td->runtest("encrypt with cleartext metadata",
+ {$td->COMMAND =>
+ "qpdf --encrypt '' '' 128 --cleartext-metadata --" .
+ " a.pdf b.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ check_metadata("b.pdf", 1, 1);
+ unlink "b.pdf";
+ $td->runtest("encrypt with aes and cleartext metadata",
+ {$td->COMMAND =>
+ "qpdf --encrypt '' '' 128" .
+ " --cleartext-metadata --use-aes=y -- a.pdf b.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ check_metadata("b.pdf", 1, 1);
+ }
+}
+
+show_ntests();
+# ----------
$td->notify("--- Linearization Tests ---");
# $n_tests incremented after initialization of @linearized_files and
# @to_linearize.
@@ -1192,6 +1237,17 @@ sub compare_pdfs
system("rm -rf tif1 tif2");
}
+sub check_metadata
+{
+ my ($file, $exp_encrypted, $exp_cleartext) = @_;
+ my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" .
+ "test 6 done\n";
+ $td->runtest("check metadata: $file",
+ {$td->COMMAND => "test_driver 6 $file"},
+ {$td->STRING => $out, $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+}
+
sub get_md5_checksum
{
my $file = shift;
diff --git a/qpdf/qtest/qpdf/compressed-metadata.pdf b/qpdf/qtest/qpdf/compressed-metadata.pdf
new file mode 100644
index 00000000..1d93bb38
--- /dev/null
+++ b/qpdf/qtest/qpdf/compressed-metadata.pdf
Binary files differ
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 48c0eb41..16859ad2 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -6,6 +6,7 @@
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <qpdf/Pl_StdioFile.hh>
+#include <qpdf/Pl_Buffer.hh>
#include <qpdf/QPDFWriter.hh>
#include <iostream>
#include <string.h>
@@ -282,6 +283,31 @@ void runtest(int n, char const* filename)
}
}
}
+ else if (n == 6)
+ {
+ QPDFObjectHandle root = pdf.getRoot();
+ QPDFObjectHandle metadata = root.getKey("/Metadata");
+ if (! metadata.isStream())
+ {
+ throw std::logic_error("test 6 run on file with no metadata");
+ }
+ Pl_Buffer bufpl("buffer");
+ metadata.pipeStreamData(&bufpl, false, false, false);
+ Buffer* buf = bufpl.getBuffer();
+ unsigned char const* data = buf->getBuffer();
+ bool cleartext = false;
+ if ((buf->getSize() > 9) &&
+ (strncmp((char const*)data, "<?xpacket", 9) == 0))
+ {
+ cleartext = true;
+ }
+ delete buf;
+ std::cout << "encrypted="
+ << (pdf.isEncrypted() ? 1 : 0)
+ << "; cleartext="
+ << (cleartext ? 1 : 0)
+ << std::endl;
+ }
else
{
throw std::runtime_error(std::string("invalid test ") +