diff options
author | Jay Berkenbilt <ejb@ql.org> | 2018-01-29 00:28:45 +0100 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2018-02-19 02:18:40 +0100 |
commit | d97474868d7fa6a94bab49d89af5dd82fd5e3a41 (patch) | |
tree | 754e4741adf505081e81a30bcd3c4395acb066f9 /include | |
parent | bb9e91adbd75d05d0d60227b2d419d7ee12e1b42 (diff) | |
download | qpdf-d97474868d7fa6a94bab49d89af5dd82fd5e3a41.tar.zst |
Lexer enhancements: EOF, comment, space
Significant enhancements to the lexer to improve EOF handling and to
support comments and spaces as tokens. Various other minor issues were
fixed as well.
Diffstat (limited to 'include')
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 8c9fe984..cd727613 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -33,7 +33,8 @@ class QPDFTokenizer { public: // Token type tt_eof is only returned of allowEOF() is called on - // the tokenizer. tt_eof was introduced in QPDF version 4.1. + // the tokenizer. tt_eof was introduced in QPDF version 4.1. + // tt_space and tt_comment were added in QPDF version 8. enum token_type_e { tt_bad, @@ -51,6 +52,8 @@ class QPDFTokenizer tt_bool, tt_word, tt_eof, + tt_space, + tt_comment, }; class Token @@ -120,6 +123,11 @@ class QPDFTokenizer QPDF_DLL void allowEOF(); + // If called, readToken will return "ignorable" tokens for space + // and comments. This was added in QPDF 8. + QPDF_DLL + void includeIgnorable(); + // Mode of operation: // Keep presenting characters and calling getToken() until @@ -159,13 +167,15 @@ class QPDFTokenizer private: void reset(); void resolveLiteral(); + bool isSpace(char); // Lexer state - enum { st_top, st_in_comment, st_in_string, st_lt, st_gt, + enum { st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt, st_literal, st_in_hexstring, st_token_ready } state; bool pound_special_in_name; bool allow_eof; + bool include_ignorable; // Current token accumulation token_type_e type; |