From e4fe0d5cf57d1d5b7d34492ffb616746ecd6ae35 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 20 Aug 2022 13:44:27 +0100 Subject: Refactor QPDFTokenizer::inHexstring --- include/qpdf/QPDFTokenizer.hh | 3 ++ libqpdf/QPDFTokenizer.cc | 73 +++++++++++++++++++++++++++++++------------ qpdf/qpdf.testcov | 1 + 3 files changed, 57 insertions(+), 20 deletions(-) diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 0ced01c4..2873b3a2 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -210,6 +210,7 @@ class QPDFTokenizer st_gt, st_literal, st_in_hexstring, + st_in_hexstring_2nd, st_inline_image, st_token_ready }; @@ -217,6 +218,7 @@ class QPDFTokenizer void handleCharacter(char); void inCharCode(char); void inHexstring(char); + void inHexstring2nd(char); void inString(char); void reset(); @@ -238,6 +240,7 @@ class QPDFTokenizer // State for strings int string_depth; + int char_code; char bs_num_register[4]; }; diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 0c6ba155..6c4b1124 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -442,6 +442,10 @@ QPDFTokenizer::handleCharacter(char ch) inHexstring(ch); return; + case (st_in_hexstring_2nd): + inHexstring2nd(ch); + return; + default: throw std::logic_error( "INTERNAL ERROR: invalid state while reading token"); @@ -451,29 +455,25 @@ QPDFTokenizer::handleCharacter(char ch) void QPDFTokenizer::inHexstring(char ch) { - if (ch == '>') { + if ('0' <= ch && ch <= '9') { + this->char_code = 16 * (int(ch) - int('0')); + this->state = st_in_hexstring_2nd; + + } else if ('A' <= ch && ch <= 'F') { + this->char_code = 16 * (10 + int(ch) - int('A')); + this->state = st_in_hexstring_2nd; + + } else if ('a' <= ch && ch <= 'f') { + this->char_code = 16 * (10 + int(ch) - int('a')); + this->state = st_in_hexstring_2nd; + + } else if (ch == '>') { this->type = tt_string; this->state = st_token_ready; - if (this->val.length() % 2) { - // PDF spec says odd hexstrings have implicit - // trailing 0. - this->val += '0'; - } - char num[3]; - num[2] = '\0'; - std::string nval; - for (unsigned int i = 0; i < this->val.length(); i += 2) { - num[0] = this->val.at(i); - num[1] = this->val.at(i + 1); - char nch = static_cast(strtol(num, nullptr, 16)); - nval += nch; - } - this->val.clear(); - this->val += nval; - } else if (QUtil::is_hex_digit(ch)) { - this->val += ch; + } else if (isSpace(ch)) { // ignore + } else { this->type = tt_bad; QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); @@ -483,6 +483,39 @@ QPDFTokenizer::inHexstring(char ch) } } +void +QPDFTokenizer::inHexstring2nd(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->val += char(this->char_code + int(ch) - int('0')); + this->state = st_in_hexstring; + + } else if ('A' <= ch && ch <= 'F') { + this->val += char(this->char_code + 10 + int(ch) - int('A')); + this->state = st_in_hexstring; + + } else if ('a' <= ch && ch <= 'f') { + this->val += char(this->char_code + 10 + int(ch) - int('a')); + this->state = st_in_hexstring; + + } else if (ch == '>') { + // PDF spec says odd hexstrings have implicit trailing 0. + this->val += char(this->char_code); + this->type = tt_string; + this->state = st_token_ready; + + } else if (isSpace(ch)) { + // ignore + + } else { + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); + this->error_message = + std::string("invalid character (") + ch + ") in hexstring"; + this->state = st_token_ready; + } +} + void QPDFTokenizer::inString(char ch) { @@ -526,7 +559,7 @@ void QPDFTokenizer::inCharCode(char ch) { size_t bs_num_count = strlen(this->bs_num_register); - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); + bool ch_is_octal = ('0' <= ch && ch <= '7'); if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { // We've accumulated \ddd. PDF Spec says to ignore // high-order overflow. diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index f535b9ee..81edf947 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -66,6 +66,7 @@ QPDF can't find xref 0 QPDFTokenizer bad ) 0 QPDFTokenizer bad > 0 QPDFTokenizer bad hexstring character 0 +QPDFTokenizer bad hexstring 2nd character 0 QPDFTokenizer null in name 0 QPDFTokenizer bad name 0 QPDF_Stream invalid filter 0 -- cgit v1.2.3-54-g00ecf