aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-09 03:44:37 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-11 01:01:41 +0200
commitef8ae5449dc30782451beba64fdd0af86e1cb931 (patch)
tree5ed173f9f86c578ba1ee08b06f63f23ecad2369a
parent8320d16cd20240cc35ec84de6bb89f2d9431ea2d (diff)
downloadqpdf-ef8ae5449dc30782451beba64fdd0af86e1cb931.tar.zst
Allow QPDFTokenizer::readToken to return bad tokens
Sometimes we want to ignore bad tokens rather than having them throw an exception. A coverage case is commented out here and added in a later commit.
-rw-r--r--include/qpdf/QPDF.hh3
-rw-r--r--include/qpdf/QPDFTokenizer.hh3
-rw-r--r--libqpdf/QPDF.cc5
-rw-r--r--libqpdf/QPDFTokenizer.cc18
-rw-r--r--qpdf/qtest/qpdf.test9
-rw-r--r--qpdf/qtest/qpdf/bad-token-startxref.out6
-rw-r--r--qpdf/qtest/qpdf/bad-token-startxref.pdf80
7 files changed, 114 insertions, 10 deletions
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 58f6af6e..f57789a0 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -666,7 +666,8 @@ class QPDF
size_t recoverStreamLength(
PointerHolder<InputSource> input, int objid, int generation,
qpdf_offset_t stream_offset);
- QPDFTokenizer::Token readToken(PointerHolder<InputSource>);
+ QPDFTokenizer::Token readToken(PointerHolder<InputSource>,
+ bool allow_bad = false);
QPDFObjectHandle readObjectAtOffset(
bool attempt_recovery,
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index 2f3dfec2..39cb9225 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -138,7 +138,8 @@ class QPDFTokenizer
// exception thrown if there is an error.
QPDF_DLL
Token readToken(PointerHolder<InputSource> input,
- std::string const& context);
+ std::string const& context,
+ bool allow_bad = false);
private:
void reset();
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index cfadaa3a..3a8dc875 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input,
}
QPDFTokenizer::Token
-QPDF::readToken(PointerHolder<InputSource> input)
+QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad)
{
- return this->tokenizer.readToken(input, this->last_object_description);
+ return this->tokenizer.readToken(
+ input, this->last_object_description, allow_bad);
}
QPDFObjectHandle
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index f7124e49..c0e09279 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens()
QPDFTokenizer::Token
QPDFTokenizer::readToken(PointerHolder<InputSource> input,
- std::string const& context)
+ std::string const& context,
+ bool allow_bad)
{
qpdf_offset_t offset = input->tell();
Token token;
@@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
input->unreadCh(char_to_unread);
}
+ input->setLastOffset(offset);
+
if (token.getType() == tt_bad)
{
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- context, offset, token.getErrorMessage());
+ if (allow_bad)
+ {
+// QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
+ }
+ else
+ {
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ context, offset, token.getErrorMessage());
+ }
}
- input->setLastOffset(offset);
-
return token;
}
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 25b5b160..b3d13bde 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -232,7 +232,7 @@ foreach my $d (@bug_tests)
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 85;
+$n_tests += 86;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -662,6 +662,13 @@ $td->runtest("combine show and --pages",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+$td->runtest("ignore bad token",
+ {$td->COMMAND =>
+ "qpdf --show-xref bad-token-startxref.pdf"},
+ {$td->FILE => "bad-token-startxref.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
show_ntests();
# ----------
$td->notify("--- Single Page ---");
diff --git a/qpdf/qtest/qpdf/bad-token-startxref.out b/qpdf/qtest/qpdf/bad-token-startxref.out
new file mode 100644
index 00000000..0dff1d34
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-token-startxref.out
@@ -0,0 +1,6 @@
+1/0: uncompressed; offset = 9
+2/0: uncompressed; offset = 63
+3/0: uncompressed; offset = 135
+4/0: uncompressed; offset = 307
+5/0: uncompressed; offset = 403
+6/0: uncompressed; offset = 438
diff --git a/qpdf/qtest/qpdf/bad-token-startxref.pdf b/qpdf/qtest/qpdf/bad-token-startxref.pdf
new file mode 100644
index 00000000..1c02b94e
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-token-startxref.pdf
@@ -0,0 +1,80 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+0000000307 00000 n
+0000000403 00000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+>>
+startxref(
+startxref
+556
+%%EOF