From 7c32f6cc2e90058b8a1fbaec48e07bf21bd66afa Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 20 Aug 2022 12:01:49 +0100 Subject: Add state st_string_escape in QPDFTokenizer --- libqpdf/QPDFTokenizer.cc | 104 ++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 50 deletions(-) (limited to 'libqpdf/QPDFTokenizer.cc') diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index d8b0379b..bf066e2a 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -85,7 +85,6 @@ QPDFTokenizer::reset() char_to_unread = '\0'; inline_image_bytes = 0; string_depth = 0; - last_char_was_bs = false; } QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : @@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch) case '(': this->string_depth = 1; memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); - this->last_char_was_bs = false; this->state = st_in_string; return; @@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch) return; case st_in_string: - { - inString(ch); - this->last_char_was_bs = - ((!this->last_char_was_bs) && (ch == '\\')); - } + inString(ch); return; - case (st_string_after_cr): + case st_string_after_cr: // CR LF in strings are either ignored or normalized to CR this->state = st_in_string; if (ch != '\n') { - handleCharacter(ch); + inString(ch); } return; - case (st_char_code): + case st_string_escape: + this->state = st_in_string; + switch (ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + this->state = st_char_code; + inCharCode(ch); + return; + + case 'n': + this->val += '\n'; + return; + + case 'r': + this->val += '\r'; + return; + + case 't': + this->val += '\t'; + return; + + case 'b': + this->val += '\b'; + return; + + case 'f': + this->val += '\f'; + return; + + case '\n': + return; + + case '\r': + this->state = st_string_after_cr; + return; + + default: + // PDF spec says backslash is ignored before anything else + this->val += ch; + return; + } + + case st_char_code: inCharCode(ch); return; @@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch) void QPDFTokenizer::inString(char ch) { - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); - if (ch_is_octal && this->last_char_was_bs) { - this->state = st_char_code; - inCharCode(ch); + if (ch == '\\') { + this->state = st_string_escape; return; - } else if (this->last_char_was_bs) { - switch (ch) { - case 'n': - this->val += '\n'; - return; - - case 'r': - this->val += '\r'; - return; - - case 't': - this->val += '\t'; - return; - - case 'b': - this->val += '\b'; - return; - - case 'f': - this->val += '\f'; - return; - - case '\n': - return; - - case '\r': - this->state = st_string_after_cr; - return; - - default: - // PDF spec says backslash is ignored before anything else - this->val += ch; - return; - } - } else if (ch == '\\') { - // last_char_was_bs is set/cleared below as appropriate } else if (ch == '(') { this->val += ch; ++this->string_depth; -- cgit v1.2.3-54-g00ecf