diff options
author | Jay Berkenbilt <ejb@ql.org> | 2017-08-09 03:44:37 +0200 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2017-08-11 01:01:41 +0200 |
commit | ef8ae5449dc30782451beba64fdd0af86e1cb931 (patch) | |
tree | 5ed173f9f86c578ba1ee08b06f63f23ecad2369a | |
parent | 8320d16cd20240cc35ec84de6bb89f2d9431ea2d (diff) | |
download | qpdf-ef8ae5449dc30782451beba64fdd0af86e1cb931.tar.zst |
Allow QPDFTokenizer::readToken to return bad tokens
Sometimes we want to ignore bad tokens rather than having them throw
an exception. A coverage case is commented out here and added in a
later commit.
-rw-r--r-- | include/qpdf/QPDF.hh | 3 | ||||
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 3 | ||||
-rw-r--r-- | libqpdf/QPDF.cc | 5 | ||||
-rw-r--r-- | libqpdf/QPDFTokenizer.cc | 18 | ||||
-rw-r--r-- | qpdf/qtest/qpdf.test | 9 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/bad-token-startxref.out | 6 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/bad-token-startxref.pdf | 80 |
7 files changed, 114 insertions, 10 deletions
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 58f6af6e..f57789a0 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -666,7 +666,8 @@ class QPDF size_t recoverStreamLength( PointerHolder<InputSource> input, int objid, int generation, qpdf_offset_t stream_offset); - QPDFTokenizer::Token readToken(PointerHolder<InputSource>); + QPDFTokenizer::Token readToken(PointerHolder<InputSource>, + bool allow_bad = false); QPDFObjectHandle readObjectAtOffset( bool attempt_recovery, diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 2f3dfec2..39cb9225 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -138,7 +138,8 @@ class QPDFTokenizer // exception thrown if there is an error. QPDF_DLL Token readToken(PointerHolder<InputSource> input, - std::string const& context); + std::string const& context, + bool allow_bad = false); private: void reset(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index cfadaa3a..3a8dc875 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, } QPDFTokenizer::Token -QPDF::readToken(PointerHolder<InputSource> input) +QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad) { - return this->tokenizer.readToken(input, this->last_object_description); + return this->tokenizer.readToken( + input, this->last_object_description, allow_bad); } QPDFObjectHandle diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index f7124e49..c0e09279 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens() QPDFTokenizer::Token QPDFTokenizer::readToken(PointerHolder<InputSource> input, - std::string const& context) + std::string const& context, + bool allow_bad) { qpdf_offset_t offset = input->tell(); Token token; @@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, input->unreadCh(char_to_unread); } + input->setLastOffset(offset); + if (token.getType() == tt_bad) { - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), - context, offset, token.getErrorMessage()); + if (allow_bad) + { +// QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); + } + else + { + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), + context, offset, token.getErrorMessage()); + } } - input->setLastOffset(offset); - return token; } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 25b5b160..b3d13bde 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -232,7 +232,7 @@ foreach my $d (@bug_tests) show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 85; +$n_tests += 86; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -662,6 +662,13 @@ $td->runtest("combine show and --pages", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("ignore bad token", + {$td->COMMAND => + "qpdf --show-xref bad-token-startxref.pdf"}, + {$td->FILE => "bad-token-startxref.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Single Page ---"); diff --git a/qpdf/qtest/qpdf/bad-token-startxref.out b/qpdf/qtest/qpdf/bad-token-startxref.out new file mode 100644 index 00000000..0dff1d34 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-token-startxref.out @@ -0,0 +1,6 @@ +1/0: uncompressed; offset = 9 +2/0: uncompressed; offset = 63 +3/0: uncompressed; offset = 135 +4/0: uncompressed; offset = 307 +5/0: uncompressed; offset = 403 +6/0: uncompressed; offset = 438 diff --git a/qpdf/qtest/qpdf/bad-token-startxref.pdf b/qpdf/qtest/qpdf/bad-token-startxref.pdf new file mode 100644 index 00000000..1c02b94e --- /dev/null +++ b/qpdf/qtest/qpdf/bad-token-startxref.pdf @@ -0,0 +1,80 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref( +startxref +556 +%%EOF |