aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDFTokenizer.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-08-06 00:59:41 +0200
committerJay Berkenbilt <ejb@ql.org>2018-08-06 02:48:35 +0200
commit4a4736c6954ab17d923a6d2968f34a33e09d714f (patch)
tree16add8bf68ddba38a83cf0933238a95fcc77b7cd /libqpdf/QPDFTokenizer.cc
parent1619cad1e8ac6ba9cc87666caae9e71f20fd6b32 (diff)
downloadqpdf-4a4736c6954ab17d923a6d2968f34a33e09d714f.tar.zst
Fix EOL handling inside strings (fixes #226)
CR, CRLF, and LF are all supposed to be treated as LF; only one EOL is to be ignored after backslash.
Diffstat (limited to 'libqpdf/QPDFTokenizer.cc')
-rw-r--r--libqpdf/QPDFTokenizer.cc27
1 files changed, 23 insertions, 4 deletions
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 9c2a1e05..e1399d82 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -34,6 +34,7 @@ QPDFTokenizer::Members::reset()
string_depth = 0;
string_ignoring_newline = false;
last_char_was_bs = false;
+ last_char_was_cr = false;
}
QPDFTokenizer::Members::~Members()
@@ -217,6 +218,7 @@ QPDFTokenizer::presentCharacter(char ch)
memset(this->m->bs_num_register, '\0',
sizeof(this->m->bs_num_register));
this->m->last_char_was_bs = false;
+ this->m->last_char_was_cr = false;
this->m->state = st_in_string;
}
else if (ch == '<')
@@ -334,8 +336,7 @@ QPDFTokenizer::presentCharacter(char ch)
}
else if (this->m->state == st_in_string)
{
- if (this->m->string_ignoring_newline &&
- (! ((ch == '\r') || (ch == '\n'))))
+ if (this->m->string_ignoring_newline && (ch != '\n'))
{
this->m->string_ignoring_newline = false;
}
@@ -353,9 +354,10 @@ QPDFTokenizer::presentCharacter(char ch)
bs_num_count = 0;
}
- if (this->m->string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
+ if (this->m->string_ignoring_newline && (ch == '\n'))
{
// ignore
+ this->m->string_ignoring_newline = false;
}
else if (ch_is_octal &&
(this->m->last_char_was_bs || (bs_num_count > 0)))
@@ -386,8 +388,10 @@ QPDFTokenizer::presentCharacter(char ch)
this->m->val += '\f';
break;
- case '\r':
case '\n':
+ break;
+
+ case '\r':
this->m->string_ignoring_newline = true;
break;
@@ -417,11 +421,26 @@ QPDFTokenizer::presentCharacter(char ch)
this->m->type = tt_string;
this->m->state = st_token_ready;
}
+ else if (ch == '\r')
+ {
+ // CR by itself is converted to LF
+ this->m->val += '\n';
+ }
+ else if (ch == '\n')
+ {
+ // CR LF is converted to LF
+ if (! this->m->last_char_was_cr)
+ {
+ this->m->val += ch;
+ }
+ }
else
{
this->m->val += ch;
}
+ this->m->last_char_was_cr =
+ ((! this->m->string_ignoring_newline) && (ch == '\r'));
this->m->last_char_was_bs =
((! this->m->last_char_was_bs) && (ch == '\\'));
}