From fefe25030eaffdaf06a9e957b3255304682c71cf Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 29 Jan 2018 20:57:04 -0500 Subject: Inline image token type --- include/qpdf/QPDFTokenizer.hh | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 7d7f6132..fe2e95f7 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -34,7 +34,8 @@ class QPDFTokenizer public: // Token type tt_eof is only returned of allowEOF() is called on // the tokenizer. tt_eof was introduced in QPDF version 4.1. - // tt_space and tt_comment were added in QPDF version 8. + // tt_space, tt_comment, and tt_inline_image were added in QPDF + // version 8. enum token_type_e { tt_bad, @@ -54,6 +55,7 @@ class QPDFTokenizer tt_eof, tt_space, tt_comment, + tt_inline_image, }; class Token @@ -128,11 +130,17 @@ class QPDFTokenizer QPDF_DLL void includeIgnorable(); - // Mode of operation: + // There are two modes of operation: push and pull. The pull + // method is easier but requires an input source. The push method + // is more complicated but can be used to tokenize a stream of + // incoming characters in a pipeline. - // Keep presenting characters and calling getToken() until - // getToken() returns true. When it does, be sure to check - // unread_ch and to unread ch if it is true. + // Push mode: + + // Keep presenting characters with presentCharacter() and + // presentEOF() and calling getToken() until getToken() returns + // true. When it does, be sure to check unread_ch and to unread ch + // if it is true. // It these are called when a token is available, an exception // will be thrown. @@ -155,15 +163,30 @@ class QPDFTokenizer QPDF_DLL bool betweenTokens(); - // Read a token from an input source. Context describes the + // Pull mode: + + // Read a token from an input source. Context describes the // context in which the token is being read and is used in the - // exception thrown if there is an error. + // exception thrown if there is an error. After a token is read, + // the position of the input source returned by input->tell() + // points to just after the token, and the input source's "last + // offset" as returned by input->getLastOffset() points to the + // beginning of the token. QPDF_DLL Token readToken(PointerHolder input, std::string const& context, bool allow_bad = false, size_t max_len = 0); + // Calling this method puts the tokenizer in a state for reading + // inline images. In that state, it will return all data up to and + // including the next EI token. After you call this method, the + // next call to readToken (or the token created next time getToken + // returns true) will either be tt_inline_image or tt_bad. This is + // the only way readToken returns a tt_inline_image token. + QPDF_DLL + void expectInlineImage(); + private: // Do not implement copy or assignment QPDFTokenizer(QPDFTokenizer const&); @@ -171,10 +194,11 @@ class QPDFTokenizer void resolveLiteral(); bool isSpace(char); + bool isDelimiter(char); enum state_e { st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt, - st_literal, st_in_hexstring, st_token_ready + st_literal, st_in_hexstring, st_inline_image, st_token_ready }; class Members -- cgit v1.2.3-54-g00ecf