summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2013-06-15 18:38:25 +0200
committerJay Berkenbilt <ejb@ql.org>2013-06-15 18:40:01 +0200
commita85007cb0d9bb8af8f0a32bda3ace19aaff97816 (patch)
tree83bd0cbe40e6f793d954c1351e97830ab966203f
parenta1d5a3e916a23ae22ee65b05df1789435a72d67c (diff)
downloadqpdf-a85007cb0d9bb8af8f0a32bda3ace19aaff97816.tar.zst
Handle more broken files
Space rather than newline after xref, missing /ID in trailer for encrypted file. This enables qpdf to handle some files that xpdf can handle. Adobe reader can't necessarily handle them.
-rw-r--r--ChangeLog5
-rw-r--r--libqpdf/QPDF.cc17
-rw-r--r--libqpdf/QPDF_encryption.cc21
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test10
-rw-r--r--qpdf/qtest/qpdf/invalid-id-xref.out16
-rw-r--r--qpdf/qtest/qpdf/invalid-id-xref.pdfbin0 -> 2499181 bytes
7 files changed, 59 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 98657248..88665a0c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-06-15 Jay Berkenbilt <ejb@ql.org>
+
+ * Handle some additional broken files with missing /ID in trailer
+ for encrypted files and with space rather than newline after xref.
+
2013-06-14 Jay Berkenbilt <ejb@ql.org>
* Detect and correct /Outlines dictionary being a direct object
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 00e13ca0..ad742daa 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
std::map<int, int> free_table;
while (xref_offset)
{
+ char buf[7];
+ memset(buf, 0, sizeof(buf));
this->file->seek(xref_offset, SEEK_SET);
- std::string line = this->file->readLine(50);
- if (line == "xref")
+ this->file->read(buf, sizeof(buf) - 1);
+ // The PDF spec says xref must be followed by a line
+ // terminator, but files exist in the wild where it is
+ // terminated by arbitrary whitespace.
+ PCRE xref_re("^xref\\s+");
+ PCRE::Match m = xref_re.match(buf);
+ if (m)
{
- xref_offset = read_xrefTable(this->file->tell());
+ QTC::TC("qpdf", "QPDF xref space",
+ ((buf[4] == '\n') ? 0 :
+ (buf[4] == '\r') ? 1 :
+ (buf[4] == ' ') ? 2 : 9999));
+ xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length());
}
else
{
diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc
index 51ed3072..88cd707f 100644
--- a/libqpdf/QPDF_encryption.cc
+++ b/libqpdf/QPDF_encryption.cc
@@ -791,17 +791,24 @@ QPDF::initializeEncryption()
// encryption dictionary.
this->encrypted = true;
+ std::string id1;
QPDFObjectHandle id_obj = this->trailer.getKey("/ID");
- if (! (id_obj.isArray() &&
- (id_obj.getArrayNItems() == 2) &&
- id_obj.getArrayItem(0).isString()))
+ if ((id_obj.isArray() &&
+ (id_obj.getArrayNItems() == 2) &&
+ id_obj.getArrayItem(0).isString()))
{
- throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
- "trailer", this->file->getLastOffset(),
- "invalid /ID in trailer dictionary");
+ id1 = id_obj.getArrayItem(0).getStringValue();
+ }
+ else
+ {
+ // Treating a missing ID as the empty string enables qpdf to
+ // decrypt some invalid encrypted files with no /ID that
+ // poppler can read but Adobe Reader can't.
+ warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
+ "trailer", this->file->getLastOffset(),
+ "invalid /ID in trailer dictionary"));
}
- std::string id1 = id_obj.getArrayItem(0).getStringValue();
QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt");
if (! encryption_dict.isDictionary())
{
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 6cd4e014..aaf2f2ff 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -264,3 +264,4 @@ QPDFObjectHandle inline image token 0
QPDF not caching overridden objstm object 0
QPDFWriter original obj non-zero gen 0
QPDF_optimization indirect outlines 0
+QPDF xref space 2
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 2f2063f8..5321ae85 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 64;
+$n_tests += 65;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -509,6 +509,14 @@ $td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "gen1.qdf"});
+# This file, from a user, is missing /ID in its trailer even though it
+# is encrypted and also has a space instead of a newline after its
+# xref keyword. xpdf can open it, but Adobe reader can't.
+$td->runtest("check broken file",
+ {$td->COMMAND => "qpdf --check invalid-id-xref.pdf"},
+ {$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+
show_ntests();
# ----------
$td->notify("--- Numeric range parsing tests ---");
diff --git a/qpdf/qtest/qpdf/invalid-id-xref.out b/qpdf/qtest/qpdf/invalid-id-xref.out
new file mode 100644
index 00000000..514d5a00
--- /dev/null
+++ b/qpdf/qtest/qpdf/invalid-id-xref.out
@@ -0,0 +1,16 @@
+WARNING: invalid-id-xref.pdf (trailer, file position 2493795): invalid /ID in trailer dictionary
+checking invalid-id-xref.pdf
+PDF Version: 1.1
+R = 3
+P = -1804
+User password =
+extract for accessibility: not allowed
+extract for any purpose: allowed
+print low resolution: allowed
+print high resolution: allowed
+modify document assembly: not allowed
+modify forms: not allowed
+modify annotations: allowed
+modify other: not allowed
+modify anything: not allowed
+File is not linearized
diff --git a/qpdf/qtest/qpdf/invalid-id-xref.pdf b/qpdf/qtest/qpdf/invalid-id-xref.pdf
new file mode 100644
index 00000000..eaae8041
--- /dev/null
+++ b/qpdf/qtest/qpdf/invalid-id-xref.pdf
Binary files differ