aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-11 01:37:05 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-11 03:30:32 +0200
commit30f109e244f365111d5219903f13d64cf1a95054 (patch)
tree0bb3bca1f13bc8a2e3d0afa57bd96f3f06cc084c
parent98a843c2a2e09df6457c023a8da52faa0d977a12 (diff)
downloadqpdf-30f109e244f365111d5219903f13d64cf1a95054.tar.zst
Read xref table without PCRE
Also accept more errors than before.
-rw-r--r--ChangeLog3
-rw-r--r--include/qpdf/QPDF.hh4
-rw-r--r--libqpdf/QPDF.cc180
-rw-r--r--qpdf/qpdf.testcov4
-rw-r--r--qpdf/qtest/qpdf.test9
-rw-r--r--qpdf/qtest/qpdf/xref-errors.out15
-rw-r--r--qpdf/qtest/qpdf/xref-errors.pdf79
7 files changed, 276 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 6b7454eb..267ab05f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2017-08-10 Jay Berkenbilt <ejb@ql.org>
+ * Be more forgiving of certain types of errors in the xref table
+ that don't interfere with interpreting the table.
+
* Remove unused "tracing" parameter from PointerHolder's
(T*, bool) constructor. This change breaks source code
compatibility, but since this argument to PointerHolder has not
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 92a66a34..004e4b9c 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -652,6 +652,10 @@ class QPDF
void setTrailer(QPDFObjectHandle obj);
void read_xref(qpdf_offset_t offset);
void reconstruct_xref(QPDFExc& e);
+ bool parse_xrefFirst(std::string const& line,
+ int& obj, int& num, int& bytes);
+ bool parse_xrefEntry(std::string const& line,
+ qpdf_offset_t& f1, int& f2, char& type);
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
qpdf_offset_t processXRefStream(
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 25ef0dfd..fc0c103f 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -9,7 +9,6 @@
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
-#include <qpdf/PCRE.hh>
#include <qpdf/Pipeline.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/FileInputSource.hh>
@@ -537,12 +536,162 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
this->deleted_objects.clear();
}
+bool
+QPDF::parse_xrefFirst(std::string const& line,
+ int& obj, int& num, int& bytes)
+{
+ // is_space and is_digit both return false on '\0', so this will
+ // not overrun the null-terminated buffer.
+ char const* p = line.c_str();
+ char const* start = line.c_str();
+
+ // Skip zero or more spaces
+ while (QUtil::is_space(*p))
+ {
+ ++p;
+ }
+ // Require digit
+ if (! QUtil::is_digit(*p))
+ {
+ return false;
+ }
+ // Gather digits
+ std::string obj_str;
+ while (QUtil::is_digit(*p))
+ {
+ obj_str.append(1, *p++);
+ }
+ // Require space
+ if (! QUtil::is_space(*p))
+ {
+ return false;
+ }
+ // Skip spaces
+ while (QUtil::is_space(*p))
+ {
+ ++p;
+ }
+ // Require digit
+ if (! QUtil::is_digit(*p))
+ {
+ return false;
+ }
+ // Gather digits
+ std::string num_str;
+ while (QUtil::is_digit(*p))
+ {
+ num_str.append(1, *p++);
+ }
+ // Skip any space including line terminators
+ while (QUtil::is_space(*p))
+ {
+ ++p;
+ }
+ bytes = p - start;
+ obj = atoi(obj_str.c_str());
+ num = atoi(num_str.c_str());
+ return true;
+}
+
+bool
+QPDF::parse_xrefEntry(std::string const& line,
+ qpdf_offset_t& f1, int& f2, char& type)
+{
+ // is_space and is_digit both return false on '\0', so this will
+ // not overrun the null-terminated buffer.
+ char const* p = line.c_str();
+
+ // Skip zero or more spaces. There aren't supposed to be any.
+ bool invalid = false;
+ while (QUtil::is_space(*p))
+ {
+ ++p;
+ QTC::TC("qpdf", "QPDF ignore first space in xref entry");
+ invalid = true;
+ }
+ // Require digit
+ if (! QUtil::is_digit(*p))
+ {
+ return false;
+ }
+ // Gather digits
+ std::string f1_str;
+ while (QUtil::is_digit(*p))
+ {
+ f1_str.append(1, *p++);
+ }
+ // Require space
+ if (! QUtil::is_space(*p))
+ {
+ return false;
+ }
+ if (QUtil::is_space(*(p+1)))
+ {
+ QTC::TC("qpdf", "QPDF ignore first extra space in xref entry");
+ invalid = true;
+ }
+ // Skip spaces
+ while (QUtil::is_space(*p))
+ {
+ ++p;
+ }
+ // Require digit
+ if (! QUtil::is_digit(*p))
+ {
+ return false;
+ }
+ // Gather digits
+ std::string f2_str;
+ while (QUtil::is_digit(*p))
+ {
+ f2_str.append(1, *p++);
+ }
+ // Require space
+ if (! QUtil::is_space(*p))
+ {
+ return false;
+ }
+ if (QUtil::is_space(*(p+1)))
+ {
+ QTC::TC("qpdf", "QPDF ignore second extra space in xref entry");
+ invalid = true;
+ }
+ // Skip spaces
+ while (QUtil::is_space(*p))
+ {
+ ++p;
+ }
+ if ((*p == 'f') || (*p == 'n'))
+ {
+ type = *p;
+ }
+ else
+ {
+ return false;
+ }
+ if ((f1_str.length() != 10) || (f2_str.length() != 5))
+ {
+ QTC::TC("qpdf", "QPDF ignore length error xref entry");
+ invalid = true;
+ }
+
+ if (invalid)
+ {
+ warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
+ "xref table",
+ this->file->getLastOffset(),
+ "accepting invalid xref table entry"));
+ }
+
+ f1 = QUtil::string_to_ll(f1_str.c_str());
+ f2 = atoi(f2_str.c_str());
+
+ return true;
+}
+
qpdf_offset_t
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
{
- PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
- PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)");
-
std::vector<QPDFObjGen> deleted_items;
this->file->seek(xref_offset, SEEK_SET);
@@ -553,18 +702,17 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
memset(linebuf, 0, sizeof(linebuf));
this->file->read(linebuf, sizeof(linebuf) - 1);
std::string line = linebuf;
- PCRE::Match m1 = xref_first_re.match(line.c_str());
- if (! m1)
+ int obj = 0;
+ int num = 0;
+ int bytes = 0;
+ if (! parse_xrefFirst(line, obj, num, bytes))
{
QTC::TC("qpdf", "QPDF invalid xref");
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
"xref table", this->file->getLastOffset(),
"xref syntax invalid");
}
- file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
- SEEK_SET);
- int obj = atoi(m1.getMatch(1).c_str());
- int num = atoi(m1.getMatch(2).c_str());
+ this->file->seek(this->file->getLastOffset() + bytes, SEEK_SET);
for (int i = obj; i < obj + num; ++i)
{
if (i == 0)
@@ -573,8 +721,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
this->first_xref_item_offset = this->file->tell();
}
std::string xref_entry = this->file->readLine(30);
- PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str());
- if (! m2)
+ // For xref_table, these will always be small enough to be ints
+ qpdf_offset_t f1 = 0;
+ int f2 = 0;
+ char type = '\0';
+ if (! parse_xrefEntry(xref_entry, f1, f2, type))
{
QTC::TC("qpdf", "QPDF invalid xref entry");
throw QPDFExc(
@@ -583,11 +734,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
"invalid xref entry (obj=" +
QUtil::int_to_string(i) + ")");
}
-
- // For xref_table, these will always be small enough to be ints
- qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str());
- int f2 = atoi(m2.getMatch(2).c_str());
- char type = m2.getMatch(3).at(0);
if (type == 'f')
{
// Save deleted items until after we've checked the
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 2860f55e..c08ed721 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -289,3 +289,7 @@ qpdf single-pages %d 0
qpdf single-pages .pdf 0
qpdf single-pages other 0
QPDFTokenizer allowing bad token 0
+QPDF ignore first space in xref entry 0
+QPDF ignore first extra space in xref entry 0
+QPDF ignore second extra space in xref entry 0
+QPDF ignore length error xref entry 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index b3d13bde..d0cba589 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -232,7 +232,7 @@ foreach my $d (@bug_tests)
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 86;
+$n_tests += 87;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -669,6 +669,13 @@ $td->runtest("ignore bad token",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+$td->runtest("recoverable xref errors",
+ {$td->COMMAND =>
+ "qpdf --check --show-xref xref-errors.pdf"},
+ {$td->FILE => "xref-errors.out",
+ $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+
show_ntests();
# ----------
$td->notify("--- Single Page ---");
diff --git a/qpdf/qtest/qpdf/xref-errors.out b/qpdf/qtest/qpdf/xref-errors.out
new file mode 100644
index 00000000..7a2cf384
--- /dev/null
+++ b/qpdf/qtest/qpdf/xref-errors.out
@@ -0,0 +1,15 @@
+WARNING: xref-errors.pdf (xref table, file position 585): accepting invalid xref table entry
+WARNING: xref-errors.pdf (xref table, file position 606): accepting invalid xref table entry
+WARNING: xref-errors.pdf (xref table, file position 627): accepting invalid xref table entry
+WARNING: xref-errors.pdf (xref table, file position 648): accepting invalid xref table entry
+WARNING: xref-errors.pdf (xref table, file position 667): accepting invalid xref table entry
+checking xref-errors.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+1/0: uncompressed; offset = 9
+2/0: uncompressed; offset = 63
+3/0: uncompressed; offset = 135
+4/0: uncompressed; offset = 307
+5/0: uncompressed; offset = 403
+6/0: uncompressed; offset = 438
diff --git a/qpdf/qtest/qpdf/xref-errors.pdf b/qpdf/qtest/qpdf/xref-errors.pdf
new file mode 100644
index 00000000..1778ab92
--- /dev/null
+++ b/qpdf/qtest/qpdf/xref-errors.pdf
@@ -0,0 +1,79 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+ 0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+000000307 00000 n
+0000000403 0000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+>>
+startxref
+556
+%%EOF