summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2013-12-14 21:08:54 +0100
committerJay Berkenbilt <ejb@ql.org>2013-12-14 21:17:23 +0100
commite9a319fb9536347aeab076cdb18e1ff97eb66c07 (patch)
tree1479c589e9fb385b3486b51d1e2029a9d1195a5d
parent7393a038686ceb96461afe5d8ffa901a286baee6 (diff)
downloadqpdf-e9a319fb9536347aeab076cdb18e1ff97eb66c07.tar.zst
Allow arbitrary whitespace, not just newline, after xref
Fixes #27.
-rw-r--r--ChangeLog5
-rw-r--r--libqpdf/QPDF.cc9
-rw-r--r--qpdf/qtest/qpdf.test1
-rw-r--r--qpdf/qtest/qpdf/leading-junk.pdfbin13670 -> 13670 bytes
4 files changed, 13 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 35037447..12cb789b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-12-14 Jay Berkenbilt <ejb@ql.org>
+
+ * Allow anyspace rather than just newline to follow xref header.
+ This allows qpdf to read a wider range of damaged files.
+
2013-11-29 Jay Berkenbilt <ejb@ql.org>
* If NO_GET_ENVIRONMENT is #defined, for Windows only,
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index e3c3d83e..d1ebb8c2 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
qpdf_offset_t
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
{
- PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)");
+ PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
std::vector<QPDFObjGen> deleted_items;
@@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
bool done = false;
while (! done)
{
- std::string line = this->file->readLine(50);
+ char linebuf[51];
+ memset(linebuf, 0, sizeof(linebuf));
+ this->file->read(linebuf, sizeof(linebuf) - 1);
+ std::string line = linebuf;
PCRE::Match m1 = xref_first_re.match(line.c_str());
if (! m1)
{
@@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
"xref table", this->file->getLastOffset(),
"xref syntax invalid");
}
+ file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
+ SEEK_SET);
int obj = atoi(m1.getMatch(1).c_str());
int num = atoi(m1.getMatch(2).c_str());
static int const xref_entry_size = 20;
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index b0390cc1..d54adc7e 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -464,6 +464,7 @@ $td->runtest("object with zero offset",
{$td->COMMAND => "qpdf --check zero-offset.pdf"},
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
+# leading-junk also has a space instead of a newline after xref
$td->runtest("check file with leading junk",
{$td->COMMAND => "qpdf --check leading-junk.pdf"},
{$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
diff --git a/qpdf/qtest/qpdf/leading-junk.pdf b/qpdf/qtest/qpdf/leading-junk.pdf
index 2b2a0a2c..45536159 100644
--- a/qpdf/qtest/qpdf/leading-junk.pdf
+++ b/qpdf/qtest/qpdf/leading-junk.pdf
Binary files differ