aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-01-30 02:57:04 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commitfefe25030eaffdaf06a9e957b3255304682c71cf (patch)
treefa404200db521e085a711fa13f6952469665ae8e /include
parent2699ecf13e8559b136ded1986bf18e1a0a51011f (diff)
downloadqpdf-fefe25030eaffdaf06a9e957b3255304682c71cf.tar.zst
Inline image token type
Diffstat (limited to 'include')
-rw-r--r--include/qpdf/QPDFTokenizer.hh40
1 files changed, 32 insertions, 8 deletions
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index 7d7f6132..fe2e95f7 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -34,7 +34,8 @@ class QPDFTokenizer
public:
// Token type tt_eof is only returned of allowEOF() is called on
// the tokenizer. tt_eof was introduced in QPDF version 4.1.
- // tt_space and tt_comment were added in QPDF version 8.
+ // tt_space, tt_comment, and tt_inline_image were added in QPDF
+ // version 8.
enum token_type_e
{
tt_bad,
@@ -54,6 +55,7 @@ class QPDFTokenizer
tt_eof,
tt_space,
tt_comment,
+ tt_inline_image,
};
class Token
@@ -128,11 +130,17 @@ class QPDFTokenizer
QPDF_DLL
void includeIgnorable();
- // Mode of operation:
+ // There are two modes of operation: push and pull. The pull
+ // method is easier but requires an input source. The push method
+ // is more complicated but can be used to tokenize a stream of
+ // incoming characters in a pipeline.
- // Keep presenting characters and calling getToken() until
- // getToken() returns true. When it does, be sure to check
- // unread_ch and to unread ch if it is true.
+ // Push mode:
+
+ // Keep presenting characters with presentCharacter() and
+ // presentEOF() and calling getToken() until getToken() returns
+ // true. When it does, be sure to check unread_ch and to unread ch
+ // if it is true.
// It these are called when a token is available, an exception
// will be thrown.
@@ -155,15 +163,30 @@ class QPDFTokenizer
QPDF_DLL
bool betweenTokens();
- // Read a token from an input source. Context describes the
+ // Pull mode:
+
+ // Read a token from an input source. Context describes the
// context in which the token is being read and is used in the
- // exception thrown if there is an error.
+ // exception thrown if there is an error. After a token is read,
+ // the position of the input source returned by input->tell()
+ // points to just after the token, and the input source's "last
+ // offset" as returned by input->getLastOffset() points to the
+ // beginning of the token.
QPDF_DLL
Token readToken(PointerHolder<InputSource> input,
std::string const& context,
bool allow_bad = false,
size_t max_len = 0);
+ // Calling this method puts the tokenizer in a state for reading
+ // inline images. In that state, it will return all data up to and
+ // including the next EI token. After you call this method, the
+ // next call to readToken (or the token created next time getToken
+ // returns true) will either be tt_inline_image or tt_bad. This is
+ // the only way readToken returns a tt_inline_image token.
+ QPDF_DLL
+ void expectInlineImage();
+
private:
// Do not implement copy or assignment
QPDFTokenizer(QPDFTokenizer const&);
@@ -171,10 +194,11 @@ class QPDFTokenizer
void resolveLiteral();
bool isSpace(char);
+ bool isDelimiter(char);
enum state_e {
st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt,
- st_literal, st_in_hexstring, st_token_ready
+ st_literal, st_in_hexstring, st_inline_image, st_token_ready
};
class Members