diff options
author | m-holger <m-holger@kubitscheck.org> | 2022-08-19 20:10:38 +0200 |
---|---|---|
committer | m-holger <m-holger@kubitscheck.org> | 2022-08-21 12:13:48 +0200 |
commit | 7c5778f999e15cc1af6360710f8055c2fa234d03 (patch) | |
tree | eef4e32ef4dce05f775997f877c6ad339032c073 | |
parent | f29d0a63121f1243b80c74d90c59a8a88f0a9223 (diff) | |
download | qpdf-7c5778f999e15cc1af6360710f8055c2fa234d03.tar.zst |
Add state st_string_after_cr in QPDFTokenizer
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 3 | ||||
-rw-r--r-- | libqpdf/QPDFTokenizer.cc | 32 |
2 files changed, 13 insertions, 22 deletions
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index c84b8154..732c3cd8 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -204,6 +204,7 @@ class QPDFTokenizer st_in_comment, st_in_string, st_char_code, + st_string_after_cr, st_lt, st_gt, st_literal, @@ -236,10 +237,8 @@ class QPDFTokenizer // State for strings int string_depth; - bool string_ignoring_newline; char bs_num_register[4]; bool last_char_was_bs; - bool last_char_was_cr; }; #endif // QPDFTOKENIZER_HH diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index b070294e..d8b0379b 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -85,9 +85,7 @@ QPDFTokenizer::reset() char_to_unread = '\0'; inline_image_bytes = 0; string_depth = 0; - string_ignoring_newline = false; last_char_was_bs = false; - last_char_was_cr = false; } QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : @@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch) case '(': this->string_depth = 1; - this->string_ignoring_newline = false; memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); this->last_char_was_bs = false; - this->last_char_was_cr = false; this->state = st_in_string; return; @@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch) case st_in_string: { - if (this->string_ignoring_newline && (ch != '\n')) { - this->string_ignoring_newline = false; - } inString(ch); - - this->last_char_was_cr = - ((!this->string_ignoring_newline) && (ch == '\r')); this->last_char_was_bs = ((!this->last_char_was_bs) && (ch == '\\')); } return; + case (st_string_after_cr): + // CR LF in strings are either ignored or normalized to CR + this->state = st_in_string; + if (ch != '\n') { + handleCharacter(ch); + } + return; + case (st_char_code): inCharCode(ch); return; @@ -447,11 +445,7 @@ void QPDFTokenizer::inString(char ch) { bool ch_is_octal = ((ch >= '0') && (ch <= '7')); - if (this->string_ignoring_newline && (ch == '\n')) { - // ignore - this->string_ignoring_newline = false; - return; - } else if (ch_is_octal && this->last_char_was_bs) { + if (ch_is_octal && this->last_char_was_bs) { this->state = st_char_code; inCharCode(ch); return; @@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch) return; case '\r': - this->string_ignoring_newline = true; + this->state = st_string_after_cr; return; default: @@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch) } else if (ch == '\r') { // CR by itself is converted to LF this->val += '\n'; + this->state = st_string_after_cr; return; } else if (ch == '\n') { - // CR LF is converted to LF - if (!this->last_char_was_cr) { - this->val += ch; - } + this->val += ch; return; } else { this->val += ch; |