From 0795b6952bdaf5b7f8ce7842ae26676d80286dab Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 22 Mar 2023 12:10:24 +0000 Subject: Add new function QUtil::hex_decode_char --- include/qpdf/QUtil.hh | 17 +++++++++++++++-- libqpdf/JSON.cc | 8 ++------ libqpdf/QPDFTokenizer.cc | 48 ++++++++---------------------------------------- 3 files changed, 25 insertions(+), 48 deletions(-) diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index b42fe195..4d46f630 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -223,6 +223,11 @@ namespace QUtil QPDF_DLL std::string hex_decode(std::string const&); + // Decode a single hex digit into a char in the range 0 <= char < 16. Return + // a char >= 16 if digit is not a valid hex digit. + QPDF_DLL + inline constexpr char hex_decode_char(char digit) noexcept; + // Set stdin, stdout to binary mode QPDF_DLL void binary_stdout(); @@ -550,8 +555,7 @@ namespace QUtil inline bool QUtil::is_hex_digit(char ch) { - return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || - ('A' <= ch && ch <= 'F'); + return hex_decode_char(ch) < '\20'; } inline bool @@ -603,4 +607,13 @@ QUtil::hex_encode_char(char c) '#', hexchars[static_cast(c) >> 4], hexchars[c & 0x0f]}; } +inline constexpr char +QUtil::hex_decode_char(char digit) noexcept +{ + return digit <= '9' && digit >= '0' + ? char(digit - '0') + : (digit >= 'a' ? char(digit - 'a' + 10) + : (digit >= 'A' ? char(digit - 'A' + 10) : '\20')); +} + #endif // QUTIL_HH diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index cb60eabc..fbf06f88 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1121,12 +1121,8 @@ JSONParser::getToken() case ls_u4: using ui = unsigned int; - if ('0' <= *p && *p <= '9') { - u_value = 16 * u_value + (ui(*p) - ui('0')); - } else if ('a' <= *p && *p <= 'f') { - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); - } else if ('A' <= *p && *p <= 'F') { - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) { + u_value = 16 * u_value + val; } else { tokenError(); } diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 990d5b65..d8d457ab 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -449,18 +449,9 @@ QPDFTokenizer::inNameHex1(char ch) { this->hex_char = ch; - if ('0' <= ch && ch <= '9') { - this->char_code = 16 * (int(ch) - int('0')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code = int(hval) << 4; this->state = st_name_hex2; - - } else if ('A' <= ch && ch <= 'F') { - this->char_code = 16 * (10 + int(ch) - int('A')); - this->state = st_name_hex2; - - } else if ('a' <= ch && ch <= 'f') { - this->char_code = 16 * (10 + int(ch) - int('a')); - this->state = st_name_hex2; - } else { QTC::TC("qpdf", "QPDFTokenizer bad name 1"); this->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -475,15 +466,8 @@ QPDFTokenizer::inNameHex1(char ch) void QPDFTokenizer::inNameHex2(char ch) { - if ('0' <= ch && ch <= '9') { - this->char_code += int(ch) - int('0'); - - } else if ('A' <= ch && ch <= 'F') { - this->char_code += 10 + int(ch) - int('A'); - - } else if ('a' <= ch && ch <= 'f') { - this->char_code += 10 + int(ch) - int('a'); - + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code |= int(hval); } else { QTC::TC("qpdf", "QPDFTokenizer bad name 2"); this->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -675,16 +659,8 @@ QPDFTokenizer::inLiteral(char ch) void QPDFTokenizer::inHexstring(char ch) { - if ('0' <= ch && ch <= '9') { - this->char_code = 16 * (int(ch) - int('0')); - this->state = st_in_hexstring_2nd; - - } else if ('A' <= ch && ch <= 'F') { - this->char_code = 16 * (10 + int(ch) - int('A')); - this->state = st_in_hexstring_2nd; - - } else if ('a' <= ch && ch <= 'f') { - this->char_code = 16 * (10 + int(ch) - int('a')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code = int(hval) << 4; this->state = st_in_hexstring_2nd; } else if (ch == '>') { @@ -706,16 +682,8 @@ QPDFTokenizer::inHexstring(char ch) void QPDFTokenizer::inHexstring2nd(char ch) { - if ('0' <= ch && ch <= '9') { - this->val += char(this->char_code + int(ch) - int('0')); - this->state = st_in_hexstring; - - } else if ('A' <= ch && ch <= 'F') { - this->val += char(this->char_code + 10 + int(ch) - int('A')); - this->state = st_in_hexstring; - - } else if ('a' <= ch && ch <= 'f') { - this->val += char(this->char_code + 10 + int(ch) - int('a')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->val += char(this->char_code) | hval; this->state = st_in_hexstring; } else if (ch == '>') { -- cgit v1.2.3-54-g00ecf From 5f4f553c3588f3ef3cb704c9d3c6db6bb78ccfa9 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 23 Mar 2023 18:51:28 +0000 Subject: Refactor QUtil::hex_decode --- libqpdf/QUtil.cc | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index bae067b6..03301d9d 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -783,28 +783,25 @@ std::string QUtil::hex_decode(std::string const& input) { std::string result; - size_t pos = 0; + // We know result.size() <= 0.5 * input.size() + 1. However, reserving + // string space for this upper bound has a negative impact. + bool first = true; + char decoded; for (auto ch: input) { - bool skip = false; - if ((ch >= 'A') && (ch <= 'F')) { - ch = QIntC::to_char(ch - 'A' + 10); - } else if ((ch >= 'a') && (ch <= 'f')) { - ch = QIntC::to_char(ch - 'a' + 10); - } else if ((ch >= '0') && (ch <= '9')) { - ch = QIntC::to_char(ch - '0'); - } else { - skip = true; - } - if (!skip) { - if (pos == 0) { - result.push_back(static_cast(ch << 4)); - pos = 1; + ch = hex_decode_char(ch); + if (ch < '\20') { + if (first) { + decoded = static_cast(ch << 4); + first = false; } else { - result[result.length() - 1] |= ch; - pos = 0; + result.push_back(decoded | ch); + first = true; } } } + if (!first) { + result.push_back(decoded); + } return result; } -- cgit v1.2.3-54-g00ecf