aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/ContentNormalizer.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-03 00:21:34 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commit99101044429c3c91bd11bdd1b26e5b6c2ceb140b (patch)
tree5ab366eab31ddf76e80f99bd1d34c421291f1c4e /libqpdf/ContentNormalizer.cc
parentb8723e97f4b94fe03e631aab0309382ead3137ed (diff)
downloadqpdf-99101044429c3c91bd11bdd1b26e5b6c2ceb140b.tar.zst
Implement TokenFilter and refactor Pl_QPDFTokenizer
Implement a TokenFilter class and refactor Pl_QPDFTokenizer to use a TokenFilter class called ContentNormalizer. Pl_QPDFTokenizer is now a general filter that passes data through a TokenFilter.
Diffstat (limited to 'libqpdf/ContentNormalizer.cc')
-rw-r--r--libqpdf/ContentNormalizer.cc77
1 files changed, 77 insertions, 0 deletions
diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc
new file mode 100644
index 00000000..35a8ad74
--- /dev/null
+++ b/libqpdf/ContentNormalizer.cc
@@ -0,0 +1,77 @@
+#include <qpdf/ContentNormalizer.hh>
+#include <qpdf/QUtil.hh>
+
+ContentNormalizer::ContentNormalizer()
+{
+}
+
+ContentNormalizer::~ContentNormalizer()
+{
+}
+
+void
+ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
+{
+ std::string value = token.getRawValue();
+ QPDFTokenizer::token_type_e token_type = token.getType();
+
+ switch (token_type)
+ {
+ case QPDFTokenizer::tt_space:
+ {
+ size_t len = value.length();
+ for (size_t i = 0; i < len; ++i)
+ {
+ char ch = value.at(i);
+ if (ch == '\r')
+ {
+ if ((i + 1 < len) && (value.at(i + 1) == '\n'))
+ {
+ // ignore
+ }
+ else
+ {
+ write("\n");
+ }
+ }
+ else
+ {
+ write(&ch, 1);
+ }
+ }
+ }
+ break;
+
+ case QPDFTokenizer::tt_string:
+ // Replacing string and name tokens in this way normalizes
+ // their representation as this will automatically handle
+ // quoting of unprintable characters, etc.
+ writeToken(QPDFTokenizer::Token(
+ QPDFTokenizer::tt_string, token.getValue()));
+ break;
+
+ case QPDFTokenizer::tt_name:
+ writeToken(QPDFTokenizer::Token(
+ QPDFTokenizer::tt_name, token.getValue()));
+ break;
+
+ default:
+ writeToken(token);
+ break;
+ }
+
+ value = token.getRawValue();
+ if (((token_type == QPDFTokenizer::tt_string) ||
+ (token_type == QPDFTokenizer::tt_name)) &&
+ ((value.find('\r') != std::string::npos) ||
+ (value.find('\n') != std::string::npos)))
+ {
+ write("\n");
+ }
+}
+
+void
+ContentNormalizer::handleEOF()
+{
+ finish();
+}