diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | libqpdf/QPDF.cc | 17 | ||||
-rw-r--r-- | libqpdf/QPDF_encryption.cc | 21 | ||||
-rw-r--r-- | qpdf/qpdf.testcov | 1 | ||||
-rw-r--r-- | qpdf/qtest/qpdf.test | 10 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/invalid-id-xref.out | 16 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/invalid-id-xref.pdf | bin | 0 -> 2499181 bytes |
7 files changed, 59 insertions, 11 deletions
@@ -1,3 +1,8 @@ +2013-06-15 Jay Berkenbilt <ejb@ql.org> + + * Handle some additional broken files with missing /ID in trailer + for encrypted files and with space rather than newline after xref. + 2013-06-14 Jay Berkenbilt <ejb@ql.org> * Detect and correct /Outlines dictionary being a direct object diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 00e13ca0..ad742daa 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset) std::map<int, int> free_table; while (xref_offset) { + char buf[7]; + memset(buf, 0, sizeof(buf)); this->file->seek(xref_offset, SEEK_SET); - std::string line = this->file->readLine(50); - if (line == "xref") + this->file->read(buf, sizeof(buf) - 1); + // The PDF spec says xref must be followed by a line + // terminator, but files exist in the wild where it is + // terminated by arbitrary whitespace. + PCRE xref_re("^xref\\s+"); + PCRE::Match m = xref_re.match(buf); + if (m) { - xref_offset = read_xrefTable(this->file->tell()); + QTC::TC("qpdf", "QPDF xref space", + ((buf[4] == '\n') ? 0 : + (buf[4] == '\r') ? 1 : + (buf[4] == ' ') ? 2 : 9999)); + xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length()); } else { diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 51ed3072..88cd707f 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -791,17 +791,24 @@ QPDF::initializeEncryption() // encryption dictionary. this->encrypted = true; + std::string id1; QPDFObjectHandle id_obj = this->trailer.getKey("/ID"); - if (! (id_obj.isArray() && - (id_obj.getArrayNItems() == 2) && - id_obj.getArrayItem(0).isString())) + if ((id_obj.isArray() && + (id_obj.getArrayNItems() == 2) && + id_obj.getArrayItem(0).isString())) { - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), - "trailer", this->file->getLastOffset(), - "invalid /ID in trailer dictionary"); + id1 = id_obj.getArrayItem(0).getStringValue(); + } + else + { + // Treating a missing ID as the empty string enables qpdf to + // decrypt some invalid encrypted files with no /ID that + // poppler can read but Adobe Reader can't. + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), + "trailer", this->file->getLastOffset(), + "invalid /ID in trailer dictionary")); } - std::string id1 = id_obj.getArrayItem(0).getStringValue(); QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt"); if (! encryption_dict.isDictionary()) { diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 6cd4e014..aaf2f2ff 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -264,3 +264,4 @@ QPDFObjectHandle inline image token 0 QPDF not caching overridden objstm object 0 QPDFWriter original obj non-zero gen 0 QPDF_optimization indirect outlines 0 +QPDF xref space 2 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 2f2063f8..5321ae85 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 64; +$n_tests += 65; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -509,6 +509,14 @@ $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "gen1.qdf"}); +# This file, from a user, is missing /ID in its trailer even though it +# is encrypted and also has a space instead of a newline after its +# xref keyword. xpdf can open it, but Adobe reader can't. +$td->runtest("check broken file", + {$td->COMMAND => "qpdf --check invalid-id-xref.pdf"}, + {$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Numeric range parsing tests ---"); diff --git a/qpdf/qtest/qpdf/invalid-id-xref.out b/qpdf/qtest/qpdf/invalid-id-xref.out new file mode 100644 index 00000000..514d5a00 --- /dev/null +++ b/qpdf/qtest/qpdf/invalid-id-xref.out @@ -0,0 +1,16 @@ +WARNING: invalid-id-xref.pdf (trailer, file position 2493795): invalid /ID in trailer dictionary +checking invalid-id-xref.pdf +PDF Version: 1.1 +R = 3 +P = -1804 +User password = +extract for accessibility: not allowed +extract for any purpose: allowed +print low resolution: allowed +print high resolution: allowed +modify document assembly: not allowed +modify forms: not allowed +modify annotations: allowed +modify other: not allowed +modify anything: not allowed +File is not linearized diff --git a/qpdf/qtest/qpdf/invalid-id-xref.pdf b/qpdf/qtest/qpdf/invalid-id-xref.pdf Binary files differnew file mode 100644 index 00000000..eaae8041 --- /dev/null +++ b/qpdf/qtest/qpdf/invalid-id-xref.pdf |