aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog3
-rw-r--r--libqpdf/QPDF.cc38
-rw-r--r--qpdf/qpdf.testcov2
-rw-r--r--qpdf/qtest/qpdf.test4
-rw-r--r--qpdf/qtest/qpdf/bad37-recover.out6
-rw-r--r--qpdf/qtest/qpdf/bad37.out6
-rw-r--r--qpdf/qtest/qpdf/bad37.pdf80
-rw-r--r--qpdf/qtest/qpdf/bad38-recover.out8
-rw-r--r--qpdf/qtest/qpdf/bad38.out1
-rw-r--r--qpdf/qtest/qpdf/bad38.pdf81
10 files changed, 228 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index b0e8dd1f..d52a51b4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2019-08-19 Jay Berkenbilt <ejb@ql.org>
+ * Accept (and warn for) extraneous whitespace preceding the xref
+ table. Fixes #341.
+
* Accept (and warn for) extraneous whitespace between the stream
keyword and newline. Fixes #329.
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index e83dfa09..068630d1 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
char buf[7];
memset(buf, 0, sizeof(buf));
this->m->file->seek(xref_offset, SEEK_SET);
+ // Some files miss the mark a little with startxref. We could
+ // do a better job of searching in the neighborhood for
+ // something that looks like either an xref table or stream,
+ // but the simple heuristic of skipping whitespace can help
+ // with the xref table case and is harmless with the stream
+ // case.
+ bool done = false;
+ bool skipped_space = false;
+ while (! done)
+ {
+ char ch;
+ if (1 == this->m->file->read(&ch, 1))
+ {
+ if (QUtil::is_space(ch))
+ {
+ skipped_space = true;
+ }
+ else
+ {
+ this->m->file->unreadCh(ch);
+ done = true;
+ }
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF eof skipping spaces before xref",
+ skipped_space ? 0 : 1);
+ done = true;
+ }
+ }
+
this->m->file->read(buf, sizeof(buf) - 1);
// The PDF spec says xref must be followed by a line
// terminator, but files exist in the wild where it is
@@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
if ((strncmp(buf, "xref", 4) == 0) &&
QUtil::is_space(buf[4]))
{
+ if (skipped_space)
+ {
+ QTC::TC("qpdf", "QPDF xref skipped space");
+ warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
+ "", 0,
+ "extraneous whitespace seen before xref"));
+ }
QTC::TC("qpdf", "QPDF xref space",
((buf[4] == '\n') ? 0 :
(buf[4] == '\r') ? 1 :
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 34316ecd..f1acee76 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0
QPDFObjectHandle int returning INT_MAX 0
QPDFObjectHandle uint returning UINT_MAX 0
QPDFObjectHandle uint uint returning 0 0
+QPDF xref skipped space 0
+QPDF eof skipping spaces before xref 1
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 546a3966..39d22077 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -2291,6 +2291,8 @@ my @badfiles = ("not a PDF file", # 1
"obj/gen in wrong place", # 34
"object stream of wrong type", # 35
"bad dictionary key", # 36
+ "space before xref", # 37
+ "startxref to space then eof", # 38
);
$n_tests += @badfiles + 6;
@@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6;
# have error conditions that used to be fatal but are now considered
# non-fatal.
my %badtest_overrides = ();
-for(6, 12..15, 17, 18..32, 34, 36)
+for(6, 12..15, 17, 18..32, 34, 36..37)
{
$badtest_overrides{$_} = 0;
}
diff --git a/qpdf/qtest/qpdf/bad37-recover.out b/qpdf/qtest/qpdf/bad37-recover.out
new file mode 100644
index 00000000..7d0b7514
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad37-recover.out
@@ -0,0 +1,6 @@
+WARNING: bad37.pdf: extraneous whitespace seen before xref
+/QTest is direct and has type string (6)
+/QTest is a string with value potato
+unparse: (potato)
+unparseResolved: (potato)
+test 1 done
diff --git a/qpdf/qtest/qpdf/bad37.out b/qpdf/qtest/qpdf/bad37.out
new file mode 100644
index 00000000..8a2e07a6
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad37.out
@@ -0,0 +1,6 @@
+WARNING: bad37.pdf: extraneous whitespace seen before xref
+/QTest is direct and has type string (6)
+/QTest is a string with value potato
+unparse: (potato)
+unparseResolved: (potato)
+test 0 done
diff --git a/qpdf/qtest/qpdf/bad37.pdf b/qpdf/qtest/qpdf/bad37.pdf
new file mode 100644
index 00000000..2eb0771b
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad37.pdf
@@ -0,0 +1,80 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+0000000307 00000 n
+0000000403 00000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+ /QTest (potato)
+>>
+startxref
+555
+%%EOF
diff --git a/qpdf/qtest/qpdf/bad38-recover.out b/qpdf/qtest/qpdf/bad38-recover.out
new file mode 100644
index 00000000..1350df2d
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad38-recover.out
@@ -0,0 +1,8 @@
+WARNING: bad38.pdf: file is damaged
+WARNING: bad38.pdf (offset 781): xref not found
+WARNING: bad38.pdf: Attempting to reconstruct cross-reference table
+/QTest is direct and has type string (6)
+/QTest is a string with value potato
+unparse: (potato)
+unparseResolved: (potato)
+test 1 done
diff --git a/qpdf/qtest/qpdf/bad38.out b/qpdf/qtest/qpdf/bad38.out
new file mode 100644
index 00000000..3776b6c7
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad38.out
@@ -0,0 +1 @@
+bad38.pdf (offset 781): xref not found
diff --git a/qpdf/qtest/qpdf/bad38.pdf b/qpdf/qtest/qpdf/bad38.pdf
new file mode 100644
index 00000000..8dabc153
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad38.pdf
@@ -0,0 +1,81 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+0000000307 00000 n
+0000000403 00000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+ /QTest (potato)
+>>
+startxref
+781
+%%EOF
+