aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/Pl_QPDFTokenizer.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-03 00:21:34 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commit99101044429c3c91bd11bdd1b26e5b6c2ceb140b (patch)
tree5ab366eab31ddf76e80f99bd1d34c421291f1c4e /libqpdf/Pl_QPDFTokenizer.cc
parentb8723e97f4b94fe03e631aab0309382ead3137ed (diff)
downloadqpdf-99101044429c3c91bd11bdd1b26e5b6c2ceb140b.tar.zst
Implement TokenFilter and refactor Pl_QPDFTokenizer
Implement a TokenFilter class and refactor Pl_QPDFTokenizer to use a TokenFilter class called ContentNormalizer. Pl_QPDFTokenizer is now a general filter that passes data through a TokenFilter.
Diffstat (limited to 'libqpdf/Pl_QPDFTokenizer.cc')
-rw-r--r--libqpdf/Pl_QPDFTokenizer.cc121
1 files changed, 29 insertions, 92 deletions
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
index 9595cd75..4fc37767 100644
--- a/libqpdf/Pl_QPDFTokenizer.cc
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -1,107 +1,51 @@
#include <qpdf/Pl_QPDFTokenizer.hh>
-#include <qpdf/QPDF_String.hh>
-#include <qpdf/QPDF_Name.hh>
#include <qpdf/QTC.hh>
-#include <qpdf/QUtil.hh>
#include <stdexcept>
#include <string.h>
-Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
- Pipeline(identifier, next),
- just_wrote_nl(false),
+Pl_QPDFTokenizer::Members::Members() :
+ filter(0),
last_char_was_cr(false),
unread_char(false),
char_to_unread('\0')
{
- tokenizer.allowEOF();
- tokenizer.includeIgnorable();
}
-Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
+Pl_QPDFTokenizer::Members::~Members()
{
}
-void
-Pl_QPDFTokenizer::writeNext(char const* buf, size_t len)
+Pl_QPDFTokenizer::Pl_QPDFTokenizer(
+ char const* identifier,
+ QPDFObjectHandle::TokenFilter* filter)
+ :
+ Pipeline(identifier, 0),
+ m(new Members)
{
- if (len)
- {
- getNext()->write(QUtil::unsigned_char_pointer(buf), len);
- this->just_wrote_nl = (buf[len-1] == '\n');
- }
+ m->filter = filter;
+ m->tokenizer.allowEOF();
+ m->tokenizer.includeIgnorable();
}
-void
-Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
+Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
{
- std::string value = token.getRawValue();
-
- switch (token.getType())
- {
- case QPDFTokenizer::tt_space:
- {
- size_t len = value.length();
- for (size_t i = 0; i < len; ++i)
- {
- char ch = value.at(i);
- if (ch == '\r')
- {
- if ((i + 1 < len) && (value.at(i + 1) == '\n'))
- {
- // ignore
- }
- else
- {
- writeNext("\n", 1);
- }
- }
- else
- {
- writeNext(&ch, 1);
- }
- }
- }
- value.clear();
- break;
-
- case QPDFTokenizer::tt_string:
- value = QPDF_String(token.getValue()).unparse();
-
- break;
-
- case QPDFTokenizer::tt_name:
- value = QPDF_Name(token.getValue()).unparse();
- break;
-
- default:
- break;
- }
- writeNext(value.c_str(), value.length());
}
void
Pl_QPDFTokenizer::processChar(char ch)
{
- tokenizer.presentCharacter(ch);
+ this->m->tokenizer.presentCharacter(ch);
QPDFTokenizer::Token token;
- if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
+ if (this->m->tokenizer.getToken(
+ token, this->m->unread_char, this->m->char_to_unread))
{
- writeToken(token);
- std::string value = token.getRawValue();
- QPDFTokenizer::token_type_e token_type = token.getType();
- if (((token_type == QPDFTokenizer::tt_string) ||
- (token_type == QPDFTokenizer::tt_name)) &&
- ((value.find('\r') != std::string::npos) ||
- (value.find('\n') != std::string::npos)))
+ this->m->filter->handleToken(token);
+ if ((token.getType() == QPDFTokenizer::tt_word) &&
+ (token.getValue() == "ID"))
{
- writeNext("\n", 1);
- }
- if ((token.getType() == QPDFTokenizer::tt_word) &&
- (token.getValue() == "ID"))
- {
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
- tokenizer.expectInlineImage();
- }
+ this->m->tokenizer.expectInlineImage();
+ }
}
}
@@ -109,10 +53,10 @@ Pl_QPDFTokenizer::processChar(char ch)
void
Pl_QPDFTokenizer::checkUnread()
{
- if (this->unread_char)
+ if (this->m->unread_char)
{
- processChar(this->char_to_unread);
- if (this->unread_char)
+ processChar(this->m->char_to_unread);
+ if (this->m->unread_char)
{
throw std::logic_error(
"INTERNAL ERROR: unread_char still true after processing "
@@ -135,20 +79,13 @@ Pl_QPDFTokenizer::write(unsigned char* buf, size_t len)
void
Pl_QPDFTokenizer::finish()
{
- this->tokenizer.presentEOF();
+ this->m->tokenizer.presentEOF();
QPDFTokenizer::Token token;
- if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
+ if (this->m->tokenizer.getToken(
+ token, this->m->unread_char, this->m->char_to_unread))
{
- writeToken(token);
- if (unread_char)
- {
- if (this->char_to_unread == '\r')
- {
- this->char_to_unread = '\n';
- }
- writeNext(&this->char_to_unread, 1);
- }
+ this->m->filter->handleToken(token);
}
- getNext()->finish();
+ this->m->filter->handleEOF();
}