From ef8ae5449dc30782451beba64fdd0af86e1cb931 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 8 Aug 2017 21:44:37 -0400 Subject: Allow QPDFTokenizer::readToken to return bad tokens Sometimes we want to ignore bad tokens rather than having them throw an exception. A coverage case is commented out here and added in a later commit. --- include/qpdf/QPDF.hh | 3 +- include/qpdf/QPDFTokenizer.hh | 3 +- libqpdf/QPDF.cc | 5 ++- libqpdf/QPDFTokenizer.cc | 18 +++++--- qpdf/qtest/qpdf.test | 9 +++- qpdf/qtest/qpdf/bad-token-startxref.out | 6 +++ qpdf/qtest/qpdf/bad-token-startxref.pdf | 80 +++++++++++++++++++++++++++++++++ 7 files changed, 114 insertions(+), 10 deletions(-) create mode 100644 qpdf/qtest/qpdf/bad-token-startxref.out create mode 100644 qpdf/qtest/qpdf/bad-token-startxref.pdf diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 58f6af6e..f57789a0 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -666,7 +666,8 @@ class QPDF size_t recoverStreamLength( PointerHolder input, int objid, int generation, qpdf_offset_t stream_offset); - QPDFTokenizer::Token readToken(PointerHolder); + QPDFTokenizer::Token readToken(PointerHolder, + bool allow_bad = false); QPDFObjectHandle readObjectAtOffset( bool attempt_recovery, diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 2f3dfec2..39cb9225 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -138,7 +138,8 @@ class QPDFTokenizer // exception thrown if there is an error. QPDF_DLL Token readToken(PointerHolder input, - std::string const& context); + std::string const& context, + bool allow_bad = false); private: void reset(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index cfadaa3a..3a8dc875 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder input, } QPDFTokenizer::Token -QPDF::readToken(PointerHolder input) +QPDF::readToken(PointerHolder input, bool allow_bad) { - return this->tokenizer.readToken(input, this->last_object_description); + return this->tokenizer.readToken( + input, this->last_object_description, allow_bad); } QPDFObjectHandle diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index f7124e49..c0e09279 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens() QPDFTokenizer::Token QPDFTokenizer::readToken(PointerHolder input, - std::string const& context) + std::string const& context, + bool allow_bad) { qpdf_offset_t offset = input->tell(); Token token; @@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder input, input->unreadCh(char_to_unread); } + input->setLastOffset(offset); + if (token.getType() == tt_bad) { - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), - context, offset, token.getErrorMessage()); + if (allow_bad) + { +// QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); + } + else + { + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), + context, offset, token.getErrorMessage()); + } } - input->setLastOffset(offset); - return token; } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 25b5b160..b3d13bde 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -232,7 +232,7 @@ foreach my $d (@bug_tests) show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 85; +$n_tests += 86; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -662,6 +662,13 @@ $td->runtest("combine show and --pages", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("ignore bad token", + {$td->COMMAND => + "qpdf --show-xref bad-token-startxref.pdf"}, + {$td->FILE => "bad-token-startxref.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Single Page ---"); diff --git a/qpdf/qtest/qpdf/bad-token-startxref.out b/qpdf/qtest/qpdf/bad-token-startxref.out new file mode 100644 index 00000000..0dff1d34 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-token-startxref.out @@ -0,0 +1,6 @@ +1/0: uncompressed; offset = 9 +2/0: uncompressed; offset = 63 +3/0: uncompressed; offset = 135 +4/0: uncompressed; offset = 307 +5/0: uncompressed; offset = 403 +6/0: uncompressed; offset = 438 diff --git a/qpdf/qtest/qpdf/bad-token-startxref.pdf b/qpdf/qtest/qpdf/bad-token-startxref.pdf new file mode 100644 index 00000000..1c02b94e --- /dev/null +++ b/qpdf/qtest/qpdf/bad-token-startxref.pdf @@ -0,0 +1,80 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref( +startxref +556 +%%EOF -- cgit v1.2.3-70-g09d2