aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/Pl_QPDFTokenizer.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-01-30 20:20:56 +0100
committerJay Berkenbilt <ejb@ql.org>2019-01-31 15:26:37 +0100
commitec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b (patch)
tree970f4526f39909838f837eb1de5ac672881e9d58 /libqpdf/Pl_QPDFTokenizer.cc
parent31372edce0b60211c7af98340b3afa054f414ca4 (diff)
downloadqpdf-ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b.tar.zst
Refactor QPDFTokenizer's inline image handling
Add a version of expectInlineImage that takes an input source and searches for EI. This is in preparation for improving the way EI is found. This commit just refactors the code without changing the functionality and adds tests to make sure the old and new code behave identically.
Diffstat (limited to 'libqpdf/Pl_QPDFTokenizer.cc')
-rw-r--r--libqpdf/Pl_QPDFTokenizer.cc73
1 files changed, 24 insertions, 49 deletions
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
index 577c5cc7..bd5d88ab 100644
--- a/libqpdf/Pl_QPDFTokenizer.cc
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -1,13 +1,13 @@
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QTC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/BufferInputSource.hh>
#include <stdexcept>
#include <string.h>
Pl_QPDFTokenizer::Members::Members() :
filter(0),
- last_char_was_cr(false),
- unread_char(false),
- char_to_unread('\0')
+ buf("tokenizer buffer")
{
}
@@ -33,61 +33,36 @@ Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
}
void
-Pl_QPDFTokenizer::processChar(char ch)
+Pl_QPDFTokenizer::write(unsigned char* data, size_t len)
{
- this->m->tokenizer.presentCharacter(ch);
- QPDFTokenizer::Token token;
- if (this->m->tokenizer.getToken(
- token, this->m->unread_char, this->m->char_to_unread))
- {
- this->m->filter->handleToken(token);
- if ((token.getType() == QPDFTokenizer::tt_word) &&
- (token.getValue() == "ID"))
- {
- QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
- this->m->tokenizer.expectInlineImage();
- }
- }
-}
-
-
-void
-Pl_QPDFTokenizer::checkUnread()
-{
- if (this->m->unread_char)
- {
- processChar(this->m->char_to_unread);
- if (this->m->unread_char)
- {
- throw std::logic_error(
- "INTERNAL ERROR: unread_char still true after processing "
- "unread character");
- }
- }
-}
-
-void
-Pl_QPDFTokenizer::write(unsigned char* buf, size_t len)
-{
- checkUnread();
- for (size_t i = 0; i < len; ++i)
- {
- processChar(buf[i]);
- checkUnread();
- }
+ this->m->buf.write(data, len);
}
void
Pl_QPDFTokenizer::finish()
{
- this->m->tokenizer.presentEOF();
- QPDFTokenizer::Token token;
- if (this->m->tokenizer.getToken(
- token, this->m->unread_char, this->m->char_to_unread))
+ this->m->buf.finish();
+ PointerHolder<InputSource> input =
+ new BufferInputSource("tokenizer data",
+ this->m->buf.getBuffer(), true);
+
+ while (true)
{
+ QPDFTokenizer::Token token = this->m->tokenizer.readToken(
+ input, "offset " + QUtil::int_to_string(input->tell()),
+ true);
this->m->filter->handleToken(token);
+ if (token.getType() == QPDFTokenizer::tt_eof)
+ {
+ break;
+ }
+ else if ((token.getType() == QPDFTokenizer::tt_word) &&
+ (token.getValue() == "ID"))
+ {
+ QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
+ this->m->tokenizer.expectInlineImage(input);
+ }
}
-
this->m->filter->handleEOF();
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(
m->filter, 0);