aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2015-10-31 22:03:55 +0100
committerJay Berkenbilt <ejb@ql.org>2015-10-31 23:56:43 +0100
commitb62cbe250885484a42ee161798830b1da9520097 (patch)
tree3a8f5316828ce39d193fbea3dea0bbfe7bdf8d89
parentf0b85a1eb139a08bdf52e1fc9902364bbf8b1068 (diff)
downloadqpdf-b62cbe250885484a42ee161798830b1da9520097.tar.zst
Tolerate some mangled xref tables
If xref table entries lack the spec-required trailing whitespace or contain a small amount of extra space, handle them anyway.
-rw-r--r--ChangeLog6
-rw-r--r--libqpdf/QPDF.cc9
-rw-r--r--qpdf/qtest/qpdf.test6
-rw-r--r--qpdf/qtest/qpdf/no-space-in-xref.out6
-rw-r--r--qpdf/qtest/qpdf/no-space-in-xref.pdf79
5 files changed, 99 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index f42a2f5c..34262ef0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2015-10-31 Jay Berkenbilt <ejb@ql.org>
+
+ * libqpdf/QPDF.cc (read_xrefTable): Be tolerant of some malformed
+ xref tables that don't have the required trailing space after each
+ line.
+
2015-10-29 Jay Berkenbilt <ejb@ql.org>
* Implement QPDFWriter::setDeterministicID and --deterministic-id
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 8bc8afc8..a14a07ea 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -488,7 +488,7 @@ qpdf_offset_t
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
{
PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
- PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
+ PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)");
std::vector<QPDFObjGen> deleted_items;
@@ -512,8 +512,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
SEEK_SET);
int obj = atoi(m1.getMatch(1).c_str());
int num = atoi(m1.getMatch(2).c_str());
- static int const xref_entry_size = 20;
- char xref_entry[xref_entry_size + 1];
for (int i = obj; i < obj + num; ++i)
{
if (i == 0)
@@ -521,9 +519,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
// This is needed by checkLinearization()
this->first_xref_item_offset = this->file->tell();
}
- memset(xref_entry, 0, sizeof(xref_entry));
- this->file->read(xref_entry, xref_entry_size);
- PCRE::Match m2 = xref_entry_re.match(xref_entry);
+ std::string xref_entry = this->file->readLine(30);
+ PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str());
if (! m2)
{
QTC::TC("qpdf", "QPDF invalid xref entry");
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index e222f756..7c377ea4 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 76;
+$n_tests += 77;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -570,6 +570,10 @@ $td->runtest("detect loops in pages structure",
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
+$td->runtest("no trailing space in xref table",
+ {$td->COMMAND => "qpdf --check no-space-in-xref.pdf"},
+ {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
diff --git a/qpdf/qtest/qpdf/no-space-in-xref.out b/qpdf/qtest/qpdf/no-space-in-xref.out
new file mode 100644
index 00000000..57b55a22
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-space-in-xref.out
@@ -0,0 +1,6 @@
+checking no-space-in-xref.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/no-space-in-xref.pdf b/qpdf/qtest/qpdf/no-space-in-xref.pdf
new file mode 100644
index 00000000..a7dbbd19
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-space-in-xref.pdf
@@ -0,0 +1,79 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+0000000307 00000 n
+0000000403 00000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+>>
+startxref
+556
+%%EOF