From ec538792fac039daa9636f9c94000b7bc1f3a669 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 29 Jan 2018 21:05:15 -0500 Subject: Use inline image token type in tokenizer filter --- libqpdf/Pl_QPDFTokenizer.cc | 69 ++++++++-------------------------------- libqpdf/qpdf/Pl_QPDFTokenizer.hh | 3 -- 2 files changed, 14 insertions(+), 58 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc index 8a54a640..3c737eec 100644 --- a/libqpdf/Pl_QPDFTokenizer.cc +++ b/libqpdf/Pl_QPDFTokenizer.cc @@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : just_wrote_nl(false), last_char_was_cr(false), unread_char(false), - char_to_unread('\0'), - in_inline_image(false) + char_to_unread('\0') { - memset(this->image_buf, 0, IMAGE_BUF_SIZE); } Pl_QPDFTokenizer::~Pl_QPDFTokenizer() @@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) void Pl_QPDFTokenizer::processChar(char ch) { - if (this->in_inline_image) - { - // Scan through the input looking for EI surrounded by - // whitespace. If that pattern appears in the inline image's - // representation, we're hosed, but this situation seems - // excessively unlikely, and this code path is only followed - // during content stream normalization, which is pretty much - // used for debugging and human inspection of PDF files. - memmove(this->image_buf, - this->image_buf + 1, - IMAGE_BUF_SIZE - 1); - this->image_buf[IMAGE_BUF_SIZE - 1] = ch; - if (strchr(" \t\n\v\f\r", this->image_buf[0]) && - (this->image_buf[1] == 'E') && - (this->image_buf[2] == 'I') && - strchr(" \t\n\v\f\r", this->image_buf[3])) - { - // We've found an EI operator. We've already written the - // EI operator to output; terminate with a newline - // character and resume normal processing. - writeNext("\n", 1); - this->in_inline_image = false; - QTC::TC("qpdf", "Pl_QPDFTokenizer found EI"); - } - else - { - writeNext(&ch, 1); - } - return; - } - tokenizer.presentCharacter(ch); QPDFTokenizer::Token token; if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) @@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch) if ((token.getType() == QPDFTokenizer::tt_word) && (token.getValue() == "ID")) { - // Suspend normal scanning until we find an EI token. - this->in_inline_image = true; - if (this->unread_char) - { - writeNext(&this->char_to_unread, 1); - this->unread_char = false; - } + QTC::TC("qpdf", "Pl_QPDFTokenizer found ID"); + tokenizer.expectInlineImage(); } } else @@ -171,21 +133,18 @@ void Pl_QPDFTokenizer::finish() { this->tokenizer.presentEOF(); - if (! this->in_inline_image) + QPDFTokenizer::Token token; + if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) { - QPDFTokenizer::Token token; - if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) - { - writeToken(token); - if (unread_char) - { - if (this->char_to_unread == '\r') - { - this->char_to_unread = '\n'; - } - writeNext(&this->char_to_unread, 1); - } - } + writeToken(token); + if (unread_char) + { + if (this->char_to_unread == '\r') + { + this->char_to_unread = '\n'; + } + writeNext(&this->char_to_unread, 1); + } } if (! this->just_wrote_nl) { diff --git a/libqpdf/qpdf/Pl_QPDFTokenizer.hh b/libqpdf/qpdf/Pl_QPDFTokenizer.hh index 0d041577..e47064eb 100644 --- a/libqpdf/qpdf/Pl_QPDFTokenizer.hh +++ b/libqpdf/qpdf/Pl_QPDFTokenizer.hh @@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline bool last_char_was_cr; bool unread_char; char char_to_unread; - bool in_inline_image; - static int const IMAGE_BUF_SIZE = 4; // must be >= 4 - char image_buf[IMAGE_BUF_SIZE]; }; #endif // __PL_QPDFTOKENIZER_HH__ -- cgit v1.2.3-54-g00ecf