diff options
author | Jay Berkenbilt <ejb@ql.org> | 2019-01-30 20:20:56 +0100 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2019-01-31 15:26:37 +0100 |
commit | ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b (patch) | |
tree | 970f4526f39909838f837eb1de5ac672881e9d58 /include | |
parent | 31372edce0b60211c7af98340b3afa054f414ca4 (diff) | |
download | qpdf-ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b.tar.zst |
Refactor QPDFTokenizer's inline image handling
Add a version of expectInlineImage that takes an input source and
searches for EI. This is in preparation for improving the way EI is
found. This commit just refactors the code without changing the
functionality and adds tests to make sure the old and new code behave
identically.
Diffstat (limited to 'include')
-rw-r--r-- | include/qpdf/Pl_QPDFTokenizer.hh | 8 | ||||
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 11 |
2 files changed, 12 insertions, 7 deletions
diff --git a/include/qpdf/Pl_QPDFTokenizer.hh b/include/qpdf/Pl_QPDFTokenizer.hh index 52630d2a..a571b079 100644 --- a/include/qpdf/Pl_QPDFTokenizer.hh +++ b/include/qpdf/Pl_QPDFTokenizer.hh @@ -27,6 +27,7 @@ #include <qpdf/QPDFTokenizer.hh> #include <qpdf/PointerHolder.hh> #include <qpdf/QPDFObjectHandle.hh> +#include <qpdf/Pl_Buffer.hh> // Tokenize the incoming text using QPDFTokenizer and pass the tokens // in turn to a QPDFObjectHandle::TokenFilter object. All bytes of @@ -56,9 +57,6 @@ class Pl_QPDFTokenizer: public Pipeline virtual void finish(); private: - void processChar(char ch); - void checkUnread(); - class Members { friend class Pl_QPDFTokenizer; @@ -73,9 +71,7 @@ class Pl_QPDFTokenizer: public Pipeline QPDFObjectHandle::TokenFilter* filter; QPDFTokenizer tokenizer; - bool last_char_was_cr; - bool unread_char; - char char_to_unread; + Pl_Buffer buf; }; PointerHolder<Members> m; }; diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 370296b2..31f2f398 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -178,7 +178,15 @@ class QPDFTokenizer // including the next EI token. After you call this method, the // next call to readToken (or the token created next time getToken // returns true) will either be tt_inline_image or tt_bad. This is - // the only way readToken returns a tt_inline_image token. + // the only way readToken returns a tt_inline_image token. The + // version of this method that takes a PointerHolder<InputSource> + // does a better job of locating the end of the inline image and + // should be used whenever the input source is available. It + // preserves both tell() and getLastOffset(). The version without + // the input source will always end the inline image the first + // time it sees something that looks like an EI operator. + QPDF_DLL + void expectInlineImage(PointerHolder<InputSource> input); QPDF_DLL void expectInlineImage(); @@ -223,6 +231,7 @@ class QPDFTokenizer std::string error_message; bool unread_char; char char_to_unread; + size_t inline_image_bytes; // State for strings int string_depth; |