diff options
author | Jay Berkenbilt <ejb@ql.org> | 2018-02-03 00:21:34 +0100 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2018-02-19 03:05:46 +0100 |
commit | 99101044429c3c91bd11bdd1b26e5b6c2ceb140b (patch) | |
tree | 5ab366eab31ddf76e80f99bd1d34c421291f1c4e /libqpdf/ContentNormalizer.cc | |
parent | b8723e97f4b94fe03e631aab0309382ead3137ed (diff) | |
download | qpdf-99101044429c3c91bd11bdd1b26e5b6c2ceb140b.tar.zst |
Implement TokenFilter and refactor Pl_QPDFTokenizer
Implement a TokenFilter class and refactor Pl_QPDFTokenizer to use a
TokenFilter class called ContentNormalizer. Pl_QPDFTokenizer is now a
general filter that passes data through a TokenFilter.
Diffstat (limited to 'libqpdf/ContentNormalizer.cc')
-rw-r--r-- | libqpdf/ContentNormalizer.cc | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc new file mode 100644 index 00000000..35a8ad74 --- /dev/null +++ b/libqpdf/ContentNormalizer.cc @@ -0,0 +1,77 @@ +#include <qpdf/ContentNormalizer.hh> +#include <qpdf/QUtil.hh> + +ContentNormalizer::ContentNormalizer() +{ +} + +ContentNormalizer::~ContentNormalizer() +{ +} + +void +ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) +{ + std::string value = token.getRawValue(); + QPDFTokenizer::token_type_e token_type = token.getType(); + + switch (token_type) + { + case QPDFTokenizer::tt_space: + { + size_t len = value.length(); + for (size_t i = 0; i < len; ++i) + { + char ch = value.at(i); + if (ch == '\r') + { + if ((i + 1 < len) && (value.at(i + 1) == '\n')) + { + // ignore + } + else + { + write("\n"); + } + } + else + { + write(&ch, 1); + } + } + } + break; + + case QPDFTokenizer::tt_string: + // Replacing string and name tokens in this way normalizes + // their representation as this will automatically handle + // quoting of unprintable characters, etc. + writeToken(QPDFTokenizer::Token( + QPDFTokenizer::tt_string, token.getValue())); + break; + + case QPDFTokenizer::tt_name: + writeToken(QPDFTokenizer::Token( + QPDFTokenizer::tt_name, token.getValue())); + break; + + default: + writeToken(token); + break; + } + + value = token.getRawValue(); + if (((token_type == QPDFTokenizer::tt_string) || + (token_type == QPDFTokenizer::tt_name)) && + ((value.find('\r') != std::string::npos) || + (value.find('\n') != std::string::npos))) + { + write("\n"); + } +} + +void +ContentNormalizer::handleEOF() +{ + finish(); +} |