aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/qpdf/QPDFTokenizer.hh1
-rw-r--r--libqpdf/QPDFTokenizer.cc141
2 files changed, 78 insertions, 64 deletions
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index a0e18248..8d0ceae1 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -213,6 +213,7 @@ class QPDFTokenizer
void handleCharacter(char);
void inHexstring(char);
+ void inString(char, size_t);
void reset();
// Lexer state
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 1220e016..5127a90d 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -368,70 +368,7 @@ QPDFTokenizer::handleCharacter(char ch)
bs_num_count = 0;
}
- if (this->string_ignoring_newline && (ch == '\n')) {
- // ignore
- this->string_ignoring_newline = false;
- } else if (
- ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {
- this->bs_num_register[bs_num_count++] = ch;
- } else if (this->last_char_was_bs) {
- switch (ch) {
- case 'n':
- this->val += '\n';
- break;
-
- case 'r':
- this->val += '\r';
- break;
-
- case 't':
- this->val += '\t';
- break;
-
- case 'b':
- this->val += '\b';
- break;
-
- case 'f':
- this->val += '\f';
- break;
-
- case '\n':
- break;
-
- case '\r':
- this->string_ignoring_newline = true;
- break;
-
- default:
- // PDF spec says backslash is ignored before anything else
- this->val += ch;
- break;
- }
- } else if (ch == '\\') {
- // last_char_was_bs is set/cleared below as appropriate
- if (bs_num_count) {
- throw std::logic_error(
- "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
- "when ch == '\\'");
- }
- } else if (ch == '(') {
- this->val += ch;
- ++this->string_depth;
- } else if ((ch == ')') && (--this->string_depth == 0)) {
- this->type = tt_string;
- this->state = st_token_ready;
- } else if (ch == '\r') {
- // CR by itself is converted to LF
- this->val += '\n';
- } else if (ch == '\n') {
- // CR LF is converted to LF
- if (!this->last_char_was_cr) {
- this->val += ch;
- }
- } else {
- this->val += ch;
- }
+ inString(ch, bs_num_count);
this->last_char_was_cr =
((!this->string_ignoring_newline) && (ch == '\r'));
@@ -515,6 +452,82 @@ QPDFTokenizer::inHexstring(char ch)
}
void
+QPDFTokenizer::inString(char ch, size_t bs_num_count)
+{
+ bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
+ if (this->string_ignoring_newline && (ch == '\n')) {
+ // ignore
+ this->string_ignoring_newline = false;
+ return;
+ } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {
+ this->bs_num_register[bs_num_count++] = ch;
+ return;
+ } else if (this->last_char_was_bs) {
+ switch (ch) {
+ case 'n':
+ this->val += '\n';
+ return;
+
+ case 'r':
+ this->val += '\r';
+ return;
+
+ case 't':
+ this->val += '\t';
+ return;
+
+ case 'b':
+ this->val += '\b';
+ return;
+
+ case 'f':
+ this->val += '\f';
+ return;
+
+ case '\n':
+ return;
+
+ case '\r':
+ this->string_ignoring_newline = true;
+ return;
+
+ default:
+ // PDF spec says backslash is ignored before anything else
+ this->val += ch;
+ return;
+ }
+ } else if (ch == '\\') {
+ // last_char_was_bs is set/cleared below as appropriate
+ if (bs_num_count) {
+ throw std::logic_error(
+ "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
+ "when ch == '\\'");
+ }
+ } else if (ch == '(') {
+ this->val += ch;
+ ++this->string_depth;
+ return;
+ } else if ((ch == ')') && (--this->string_depth == 0)) {
+ this->type = tt_string;
+ this->state = st_token_ready;
+ return;
+ } else if (ch == '\r') {
+ // CR by itself is converted to LF
+ this->val += '\n';
+ return;
+ } else if (ch == '\n') {
+ // CR LF is converted to LF
+ if (!this->last_char_was_cr) {
+ this->val += ch;
+ }
+ return;
+ } else {
+ this->val += ch;
+ return;
+ }
+}
+
+void
QPDFTokenizer::presentEOF()
{
if (this->state == st_literal) {