aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-03 03:16:40 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:47 +0100
commit5136238f2a973f693cea53c340dcff23a655531f (patch)
tree8cc1d2a1fdf1833fa67454b2707994b3328c879c /libqpdf
parent30709935af023dd66a17f2d494aa7dc84b7177e1 (diff)
downloadqpdf-5136238f2a973f693cea53c340dcff23a655531f.tar.zst
Detect and report bad tokens in content normalization
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/ContentNormalizer.cc26
-rw-r--r--libqpdf/QPDF_Stream.cc27
-rw-r--r--libqpdf/qpdf/ContentNormalizer.hh7
3 files changed, 59 insertions, 1 deletions
diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc
index 35a8ad74..f85ab829 100644
--- a/libqpdf/ContentNormalizer.cc
+++ b/libqpdf/ContentNormalizer.cc
@@ -1,7 +1,9 @@
#include <qpdf/ContentNormalizer.hh>
#include <qpdf/QUtil.hh>
-ContentNormalizer::ContentNormalizer()
+ContentNormalizer::ContentNormalizer() :
+ any_bad_tokens(false),
+ last_token_was_bad(false)
{
}
@@ -15,6 +17,16 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
std::string value = token.getRawValue();
QPDFTokenizer::token_type_e token_type = token.getType();
+ if (token_type == QPDFTokenizer::tt_bad)
+ {
+ this->any_bad_tokens = true;
+ this->last_token_was_bad = true;
+ }
+ else if (token_type != QPDFTokenizer::tt_eof)
+ {
+ this->last_token_was_bad = false;
+ }
+
switch (token_type)
{
case QPDFTokenizer::tt_space:
@@ -75,3 +87,15 @@ ContentNormalizer::handleEOF()
{
finish();
}
+
+bool
+ContentNormalizer::anyBadTokens() const
+{
+ return this->any_bad_tokens;
+}
+
+bool
+ContentNormalizer::lastTokenWasBad()const
+{
+ return this->last_token_was_bad;
+}
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index a026f9a4..bb1e24e6 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -609,6 +609,33 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
}
}
+ if (filter &&
+ (! suppress_warnings) &&
+ normalizer.getPointer() &&
+ normalizer->anyBadTokens())
+ {
+ warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
+ "", this->offset,
+ "content normalization encountered bad tokens"));
+ if (normalizer->lastTokenWasBad())
+ {
+ QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
+ warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
+ "", this->offset,
+ "normalized content ended with a bad token;"
+ " you may be able to resolve this by"
+ " coalescing content streams in combination"
+ " with normalizing content. From the command"
+ " line, specify --coalesce-contents"));
+ }
+ warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
+ "", this->offset,
+ "Resulting stream data may be corrupted but is"
+ " may still useful for manual inspection."
+ " For more information on this warning, search"
+ " for content normalization in the manual."));
+ }
+
return filter;
}
diff --git a/libqpdf/qpdf/ContentNormalizer.hh b/libqpdf/qpdf/ContentNormalizer.hh
index 504f15e8..89b28f3a 100644
--- a/libqpdf/qpdf/ContentNormalizer.hh
+++ b/libqpdf/qpdf/ContentNormalizer.hh
@@ -10,6 +10,13 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter
virtual ~ContentNormalizer();
virtual void handleToken(QPDFTokenizer::Token const&);
virtual void handleEOF();
+
+ bool anyBadTokens() const;
+ bool lastTokenWasBad() const;
+
+ private:
+ bool any_bad_tokens;
+ bool last_token_was_bad;
};
#endif // __CONTENTNORMALIZER_HH__