aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-01-30 20:20:56 +0100
committerJay Berkenbilt <ejb@ql.org>2019-01-31 15:26:37 +0100
commitec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b (patch)
tree970f4526f39909838f837eb1de5ac672881e9d58 /include
parent31372edce0b60211c7af98340b3afa054f414ca4 (diff)
downloadqpdf-ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b.tar.zst
Refactor QPDFTokenizer's inline image handling
Add a version of expectInlineImage that takes an input source and searches for EI. This is in preparation for improving the way EI is found. This commit just refactors the code without changing the functionality and adds tests to make sure the old and new code behave identically.
Diffstat (limited to 'include')
-rw-r--r--include/qpdf/Pl_QPDFTokenizer.hh8
-rw-r--r--include/qpdf/QPDFTokenizer.hh11
2 files changed, 12 insertions, 7 deletions
diff --git a/include/qpdf/Pl_QPDFTokenizer.hh b/include/qpdf/Pl_QPDFTokenizer.hh
index 52630d2a..a571b079 100644
--- a/include/qpdf/Pl_QPDFTokenizer.hh
+++ b/include/qpdf/Pl_QPDFTokenizer.hh
@@ -27,6 +27,7 @@
#include <qpdf/QPDFTokenizer.hh>
#include <qpdf/PointerHolder.hh>
#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/Pl_Buffer.hh>
// Tokenize the incoming text using QPDFTokenizer and pass the tokens
// in turn to a QPDFObjectHandle::TokenFilter object. All bytes of
@@ -56,9 +57,6 @@ class Pl_QPDFTokenizer: public Pipeline
virtual void finish();
private:
- void processChar(char ch);
- void checkUnread();
-
class Members
{
friend class Pl_QPDFTokenizer;
@@ -73,9 +71,7 @@ class Pl_QPDFTokenizer: public Pipeline
QPDFObjectHandle::TokenFilter* filter;
QPDFTokenizer tokenizer;
- bool last_char_was_cr;
- bool unread_char;
- char char_to_unread;
+ Pl_Buffer buf;
};
PointerHolder<Members> m;
};
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index 370296b2..31f2f398 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -178,7 +178,15 @@ class QPDFTokenizer
// including the next EI token. After you call this method, the
// next call to readToken (or the token created next time getToken
// returns true) will either be tt_inline_image or tt_bad. This is
- // the only way readToken returns a tt_inline_image token.
+ // the only way readToken returns a tt_inline_image token. The
+ // version of this method that takes a PointerHolder<InputSource>
+ // does a better job of locating the end of the inline image and
+ // should be used whenever the input source is available. It
+ // preserves both tell() and getLastOffset(). The version without
+ // the input source will always end the inline image the first
+ // time it sees something that looks like an EI operator.
+ QPDF_DLL
+ void expectInlineImage(PointerHolder<InputSource> input);
QPDF_DLL
void expectInlineImage();
@@ -223,6 +231,7 @@ class QPDFTokenizer
std::string error_message;
bool unread_char;
char char_to_unread;
+ size_t inline_image_bytes;
// State for strings
int string_depth;