diff options
author | Jay Berkenbilt <ejb@ql.org> | 2018-02-03 03:16:40 +0100 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2018-02-19 03:05:47 +0100 |
commit | 5136238f2a973f693cea53c340dcff23a655531f (patch) | |
tree | 8cc1d2a1fdf1833fa67454b2707994b3328c879c /libqpdf | |
parent | 30709935af023dd66a17f2d494aa7dc84b7177e1 (diff) | |
download | qpdf-5136238f2a973f693cea53c340dcff23a655531f.tar.zst |
Detect and report bad tokens in content normalization
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/ContentNormalizer.cc | 26 | ||||
-rw-r--r-- | libqpdf/QPDF_Stream.cc | 27 | ||||
-rw-r--r-- | libqpdf/qpdf/ContentNormalizer.hh | 7 |
3 files changed, 59 insertions, 1 deletions
diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc index 35a8ad74..f85ab829 100644 --- a/libqpdf/ContentNormalizer.cc +++ b/libqpdf/ContentNormalizer.cc @@ -1,7 +1,9 @@ #include <qpdf/ContentNormalizer.hh> #include <qpdf/QUtil.hh> -ContentNormalizer::ContentNormalizer() +ContentNormalizer::ContentNormalizer() : + any_bad_tokens(false), + last_token_was_bad(false) { } @@ -15,6 +17,16 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) std::string value = token.getRawValue(); QPDFTokenizer::token_type_e token_type = token.getType(); + if (token_type == QPDFTokenizer::tt_bad) + { + this->any_bad_tokens = true; + this->last_token_was_bad = true; + } + else if (token_type != QPDFTokenizer::tt_eof) + { + this->last_token_was_bad = false; + } + switch (token_type) { case QPDFTokenizer::tt_space: @@ -75,3 +87,15 @@ ContentNormalizer::handleEOF() { finish(); } + +bool +ContentNormalizer::anyBadTokens() const +{ + return this->any_bad_tokens; +} + +bool +ContentNormalizer::lastTokenWasBad()const +{ + return this->last_token_was_bad; +} diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index a026f9a4..bb1e24e6 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -609,6 +609,33 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, } } + if (filter && + (! suppress_warnings) && + normalizer.getPointer() && + normalizer->anyBadTokens()) + { + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), + "", this->offset, + "content normalization encountered bad tokens")); + if (normalizer->lastTokenWasBad()) + { + QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), + "", this->offset, + "normalized content ended with a bad token;" + " you may be able to resolve this by" + " coalescing content streams in combination" + " with normalizing content. From the command" + " line, specify --coalesce-contents")); + } + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), + "", this->offset, + "Resulting stream data may be corrupted but is" + " may still useful for manual inspection." + " For more information on this warning, search" + " for content normalization in the manual.")); + } + return filter; } diff --git a/libqpdf/qpdf/ContentNormalizer.hh b/libqpdf/qpdf/ContentNormalizer.hh index 504f15e8..89b28f3a 100644 --- a/libqpdf/qpdf/ContentNormalizer.hh +++ b/libqpdf/qpdf/ContentNormalizer.hh @@ -10,6 +10,13 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter virtual ~ContentNormalizer(); virtual void handleToken(QPDFTokenizer::Token const&); virtual void handleEOF(); + + bool anyBadTokens() const; + bool lastTokenWasBad() const; + + private: + bool any_bad_tokens; + bool last_token_was_bad; }; #endif // __CONTENTNORMALIZER_HH__ |