From f29d0a63121f1243b80c74d90c59a8a88f0a9223 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 19 Aug 2022 13:04:11 +0100 Subject: Add state st_char_code in QPDFTokenizer --- include/qpdf/QPDFTokenizer.hh | 5 +++- libqpdf/QPDFTokenizer.cc | 53 +++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 8d0ceae1..c84b8154 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -203,6 +203,7 @@ class QPDFTokenizer st_in_space, st_in_comment, st_in_string, + st_char_code, st_lt, st_gt, st_literal, @@ -212,8 +213,10 @@ class QPDFTokenizer }; void handleCharacter(char); + void inCharCode(char); void inHexstring(char); - void inString(char, size_t); + void inString(char); + void reset(); // Lexer state diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 5127a90d..b070294e 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch) this->state = st_token_ready; return; } + this->state = st_in_hexstring; inHexstring(ch); return; @@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch) if (this->string_ignoring_newline && (ch != '\n')) { this->string_ignoring_newline = false; } - - size_t bs_num_count = strlen(this->bs_num_register); - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); - if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { - // We've accumulated \ddd. PDF Spec says to ignore - // high-order overflow. - this->val += static_cast( - strtol(this->bs_num_register, nullptr, 8)); - memset( - this->bs_num_register, '\0', sizeof(this->bs_num_register)); - bs_num_count = 0; - } - - inString(ch, bs_num_count); + inString(ch); this->last_char_was_cr = ((!this->string_ignoring_newline) && (ch == '\r')); @@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch) } return; + case (st_char_code): + inCharCode(ch); + return; + case st_literal: if (isDelimiter(ch)) { // A C-locale whitespace character or delimiter terminates @@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch) } void -QPDFTokenizer::inString(char ch, size_t bs_num_count) +QPDFTokenizer::inString(char ch) { bool ch_is_octal = ((ch >= '0') && (ch <= '7')); if (this->string_ignoring_newline && (ch == '\n')) { // ignore this->string_ignoring_newline = false; return; - } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { - this->bs_num_register[bs_num_count++] = ch; + } else if (ch_is_octal && this->last_char_was_bs) { + this->state = st_char_code; + inCharCode(ch); return; } else if (this->last_char_was_bs) { switch (ch) { @@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) } } else if (ch == '\\') { // last_char_was_bs is set/cleared below as appropriate - if (bs_num_count) { - throw std::logic_error( - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " - "when ch == '\\'"); - } } else if (ch == '(') { this->val += ch; ++this->string_depth; @@ -527,6 +515,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) } } +void +QPDFTokenizer::inCharCode(char ch) +{ + size_t bs_num_count = strlen(this->bs_num_register); + bool ch_is_octal = ((ch >= '0') && (ch <= '7')); + if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { + // We've accumulated \ddd. PDF Spec says to ignore + // high-order overflow. + this->val += + static_cast(strtol(this->bs_num_register, nullptr, 8)); + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); + bs_num_count = 0; + this->state = st_in_string; + handleCharacter(ch); + return; + } else if (ch_is_octal) { + this->bs_num_register[bs_num_count++] = ch; + return; + } +} + void QPDFTokenizer::presentEOF() { -- cgit v1.2.3-54-g00ecf