aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDFTokenizer.cc
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2022-08-18 18:54:51 +0200
committerm-holger <m-holger@kubitscheck.org>2022-08-21 03:26:27 +0200
commit86ade3f9cd367296e64ae86870ec12ebf34883f6 (patch)
tree912573ddf5bb5dabbecb079635e42fb09448efb0 /libqpdf/QPDFTokenizer.cc
parent91fb61eda5ae66736ae9e5975ae6f2e0867366e6 (diff)
downloadqpdf-86ade3f9cd367296e64ae86870ec12ebf34883f6.tar.zst
Add private method QPDFTokenizer::handleCharacter
Diffstat (limited to 'libqpdf/QPDFTokenizer.cc')
-rw-r--r--libqpdf/QPDFTokenizer.cc110
1 files changed, 55 insertions, 55 deletions
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 60156547..f5a50695 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch)
{
char orig_ch = ch;
+ handleCharacter(ch);
+
+ if ((this->state == st_token_ready) && (this->type == tt_word)) {
+ resolveLiteral();
+ }
+
+ if (!(betweenTokens() ||
+ ((this->state == st_token_ready) && this->unread_char))) {
+ this->raw_val += orig_ch;
+ }
+}
+
+void
+QPDFTokenizer::handleCharacter(char ch)
+{
// State machine is implemented such that some characters may be
// handled more than once. This happens whenever you have to use
// the character that caused a state change in the new state.
- bool handled = true;
-
switch (this->state) {
case (st_token_ready):
throw std::logic_error(
@@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch)
this->state = st_literal;
}
}
- break;
+ return;
case st_in_space:
// We only enter this state if include_ignorable is true.
@@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch)
} else {
this->val += ch;
}
- break;
+ return;
case st_in_comment:
if ((ch == '\r') || (ch == '\n')) {
@@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch)
} else if (this->include_ignorable) {
this->val += ch;
}
- break;
+ return;
case st_lt:
if (ch == '<') {
this->val += "<<";
this->type = tt_dict_open;
this->state = st_token_ready;
- } else {
- handled = false;
- this->state = st_in_hexstring;
+ return;
}
+ this->state = st_in_hexstring;
break;
case st_gt:
@@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch)
this->char_to_unread = ch;
this->state = st_token_ready;
}
- break;
+ return;
case st_in_string:
{
@@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch)
this->last_char_was_bs =
((!this->last_char_was_bs) && (ch == '\\'));
}
- break;
+ return;
case st_literal:
if (isDelimiter(ch)) {
@@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch)
} else {
this->val += ch;
}
- break;
+ return;
case st_inline_image:
this->val += ch;
@@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch)
this->inline_image_bytes = 0;
this->state = st_token_ready;
}
+ return;
+
+ case (st_in_hexstring):
break;
default:
- handled = false;
- }
-
- if (handled) {
- // okay
- } else if (this->state == st_in_hexstring) {
- if (ch == '>') {
- this->type = tt_string;
- this->state = st_token_ready;
- if (this->val.length() % 2) {
- // PDF spec says odd hexstrings have implicit
- // trailing 0.
- this->val += '0';
- }
- char num[3];
- num[2] = '\0';
- std::string nval;
- for (unsigned int i = 0; i < this->val.length(); i += 2) {
- num[0] = this->val.at(i);
- num[1] = this->val.at(i + 1);
- char nch = static_cast<char>(strtol(num, nullptr, 16));
- nval += nch;
- }
- this->val.clear();
- this->val += nval;
- } else if (QUtil::is_hex_digit(ch)) {
- this->val += ch;
- } else if (isSpace(ch)) {
- // ignore
- } else {
- this->type = tt_bad;
- QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
- this->error_message =
- std::string("invalid character (") + ch + ") in hexstring";
- this->state = st_token_ready;
- }
- } else {
throw std::logic_error(
"INTERNAL ERROR: invalid state while reading token");
}
- if ((this->state == st_token_ready) && (this->type == tt_word)) {
- resolveLiteral();
- }
-
- if (!(betweenTokens() ||
- ((this->state == st_token_ready) && this->unread_char))) {
- this->raw_val += orig_ch;
+ if (ch == '>') {
+ this->type = tt_string;
+ this->state = st_token_ready;
+ if (this->val.length() % 2) {
+ // PDF spec says odd hexstrings have implicit
+ // trailing 0.
+ this->val += '0';
+ }
+ char num[3];
+ num[2] = '\0';
+ std::string nval;
+ for (unsigned int i = 0; i < this->val.length(); i += 2) {
+ num[0] = this->val.at(i);
+ num[1] = this->val.at(i + 1);
+ char nch = static_cast<char>(strtol(num, nullptr, 16));
+ nval += nch;
+ }
+ this->val.clear();
+ this->val += nval;
+ } else if (QUtil::is_hex_digit(ch)) {
+ this->val += ch;
+ } else if (isSpace(ch)) {
+ // ignore
+ } else {
+ this->type = tt_bad;
+ QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
+ this->error_message =
+ std::string("invalid character (") + ch + ") in hexstring";
+ this->state = st_token_ready;
}
}