aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2023-01-29 16:21:29 +0100
committerm-holger <m-holger@kubitscheck.org>2023-02-04 16:00:21 +0100
commitee32235f54884247f6117fc0fbdd462a4e38ac1f (patch)
treebf145b436e88923572a0ae8e7bdf4b42ea9a0ed9
parentf5b7448a2732d0e6f39855b98ebca63df2824916 (diff)
downloadqpdf-ee32235f54884247f6117fc0fbdd462a4e38ac1f.tar.zst
In JSONParser::getToken handle legal control chars early
Also, reject them in strings.
-rw-r--r--libqpdf/JSON.cc564
-rw-r--r--libtests/libtests.testcov1
-rw-r--r--libtests/qtest/json_parse.test8
-rw-r--r--libtests/qtest/json_parse/bad-01.out2
-rw-r--r--libtests/qtest/json_parse/bad-02.out2
-rw-r--r--libtests/qtest/json_parse/bad-03.out2
-rw-r--r--libtests/qtest/json_parse/bad-27.out2
-rw-r--r--libtests/qtest/json_parse/bad-31.json2
-rw-r--r--libtests/qtest/json_parse/bad-45.out2
-rw-r--r--libtests/qtest/json_parse/bad-46.out2
-rw-r--r--libtests/qtest/json_parse/bad-47.out2
11 files changed, 305 insertions, 284 deletions
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc
index afeda315..e9637e86 100644
--- a/libqpdf/JSON.cc
+++ b/libqpdf/JSON.cc
@@ -723,10 +723,11 @@ JSONParser::handle_u_code(
void
JSONParser::tokenError()
{
- if (bytes == 0) {
+ if (done) {
QTC::TC("libtests", "JSON parse ls premature end of input");
throw std::runtime_error("JSON: premature end of input");
}
+
if (lex_state == ls_u4) {
QTC::TC("libtests", "JSON parse bad hex after u");
throw std::runtime_error(
@@ -737,6 +738,11 @@ JSONParser::tokenError()
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
": keyword: unexpected character " + std::string(p, 1));
+ } else if (lex_state == ls_string) {
+ QTC::TC("libtests", "JSON parse control char in string");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": control character in string (missing \"?)");
} else if (lex_state == ls_backslash) {
QTC::TC("libtests", "JSON parse backslash bad character");
throw std::runtime_error(
@@ -779,6 +785,7 @@ JSONParser::tokenError()
"JSON: offset " + std::to_string(offset) +
": numeric literal: unexpected character " + std::string(p, 1));
}
+ throw std::logic_error("JSON::tokenError : unhandled error");
}
void
@@ -792,7 +799,7 @@ JSONParser::getToken()
unsigned long high_surrogate = 0;
qpdf_offset_t high_offset = 0;
- while (!done) {
+ while (true) {
if (p == (buf + bytes)) {
p = buf;
bytes = is.read(buf, sizeof(buf));
@@ -808,307 +815,320 @@ JSONParser::getToken()
// end the current token (unless we are still before the start
// of the token).
if (lex_state == ls_top) {
- // Continue with token
+ ++p;
+ ++offset;
} else {
- // done
+ break;
}
+
} else {
QTC::TC("libtests", "JSON parse null character");
throw std::runtime_error(
"JSON: control or null character at offset " +
std::to_string(offset));
}
- }
- action = append;
- switch (lex_state) {
- case ls_top:
- token_start = offset;
- if (*p == '"') {
- lex_state = ls_string;
- action = ignore;
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- } else if (*p == ',') {
- lex_state = ls_comma;
- action = ignore;
- ready = true;
- } else if (*p == ':') {
- lex_state = ls_colon;
- action = ignore;
- ready = true;
- } else if (*p == '{') {
- lex_state = ls_begin_dict;
- action = ignore;
- ready = true;
- } else if (*p == '}') {
- lex_state = ls_end_dict;
- action = ignore;
- ready = true;
- } else if (*p == '[') {
- lex_state = ls_begin_array;
- action = ignore;
- ready = true;
- } else if (*p == ']') {
- lex_state = ls_end_array;
- action = ignore;
- ready = true;
- } else if ((*p >= 'a') && (*p <= 'z')) {
- lex_state = ls_alpha;
- } else if (*p == '-') {
- lex_state = ls_number_minus;
- } else if ((*p >= '1') && (*p <= '9')) {
- lex_state = ls_number_before_point;
- } else if (*p == '0') {
- lex_state = ls_number_leading_zero;
- } else {
- QTC::TC("libtests", "JSON parse bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected character " + std::string(p, 1));
- }
- break;
-
- case ls_number_minus:
- if ((*p >= '1') && (*p <= '9')) {
- lex_state = ls_number_before_point;
- } else if (*p == '0') {
- lex_state = ls_number_leading_zero;
- } else {
- QTC::TC("libtests", "JSON parse number minus no digits");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": numeric literal: no digit after minus sign");
- }
- break;
-
- case ls_number_leading_zero:
- if (*p == '.') {
- lex_state = ls_number_point;
- } else if (QUtil::is_space(*p)) {
- lex_state = ls_number;
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- lex_state = ls_number;
- action = reread;
- ready = true;
- } else if (*p == 'e' || *p == 'E') {
- lex_state = ls_number_e;
- } else {
- QTC::TC("libtests", "JSON parse leading zero");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": number with leading zero");
- }
- break;
-
- case ls_number_before_point:
- if ((*p >= '0') && (*p <= '9')) {
- // continue
- } else if (*p == '.') {
- lex_state = ls_number_point;
- } else if (QUtil::is_space(*p)) {
- lex_state = ls_number;
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- lex_state = ls_number;
- action = reread;
- ready = true;
- } else if (*p == 'e' || *p == 'E') {
- lex_state = ls_number_e;
- } else {
- tokenError();
- }
- break;
-
- case ls_number_point:
- if ((*p >= '0') && (*p <= '9')) {
- lex_state = ls_number_after_point;
- } else {
- tokenError();
- }
- break;
-
- case ls_number_after_point:
- if ((*p >= '0') && (*p <= '9')) {
- // continue
- } else if (QUtil::is_space(*p)) {
- lex_state = ls_number;
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- lex_state = ls_number;
- action = reread;
- ready = true;
- } else if (*p == 'e' || *p == 'E') {
- lex_state = ls_number_e;
- } else {
- tokenError();
- }
- break;
+ } else {
+ action = append;
+ switch (lex_state) {
+ case ls_top:
+ token_start = offset;
+ if (*p == '"') {
+ lex_state = ls_string;
+ action = ignore;
+ } else if (*p == ' ') {
+ action = ignore;
+ } else if (*p == ',') {
+ lex_state = ls_comma;
+ action = ignore;
+ ready = true;
+ } else if (*p == ',') {
+ lex_state = ls_comma;
+ action = ignore;
+ ready = true;
+ } else if (*p == ':') {
+ lex_state = ls_colon;
+ action = ignore;
+ ready = true;
+ } else if (*p == '{') {
+ lex_state = ls_begin_dict;
+ action = ignore;
+ ready = true;
+ } else if (*p == '}') {
+ lex_state = ls_end_dict;
+ action = ignore;
+ ready = true;
+ } else if (*p == '[') {
+ lex_state = ls_begin_array;
+ action = ignore;
+ ready = true;
+ } else if (*p == ']') {
+ lex_state = ls_end_array;
+ action = ignore;
+ ready = true;
+ } else if ((*p >= 'a') && (*p <= 'z')) {
+ lex_state = ls_alpha;
+ } else if (*p == '-') {
+ lex_state = ls_number_minus;
+ } else if ((*p >= '1') && (*p <= '9')) {
+ lex_state = ls_number_before_point;
+ } else if (*p == '0') {
+ lex_state = ls_number_leading_zero;
+ } else {
+ QTC::TC("libtests", "JSON parse bad character");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": unexpected character " + std::string(p, 1));
+ }
+ break;
- case ls_number_e:
- if ((*p >= '0') && (*p <= '9')) {
- lex_state = ls_number;
- } else if ((*p == '+') || (*p == '-')) {
- lex_state = ls_number_e_sign;
- } else {
- tokenError();
- }
- break;
+ case ls_number_minus:
+ if ((*p >= '1') && (*p <= '9')) {
+ lex_state = ls_number_before_point;
+ } else if (*p == '0') {
+ lex_state = ls_number_leading_zero;
+ } else {
+ QTC::TC("libtests", "JSON parse number minus no digits");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: no digit after minus sign");
+ }
+ break;
- case ls_number_e_sign:
- if ((*p >= '0') && (*p <= '9')) {
- lex_state = ls_number;
- } else {
- tokenError();
- }
- break;
+ case ls_number_leading_zero:
+ if (*p == '.') {
+ lex_state = ls_number_point;
+ } else if (*p == ' ') {
+ lex_state = ls_number;
+ action = ignore;
+ ready = true;
+ } else if (strchr("{}[]:,", *p)) {
+ lex_state = ls_number;
+ action = reread;
+ ready = true;
+ } else if (*p == 'e' || *p == 'E') {
+ lex_state = ls_number_e;
+ } else {
+ QTC::TC("libtests", "JSON parse leading zero");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": number with leading zero");
+ }
+ break;
- case ls_number:
- // We only get here after we have seen an exponent.
- if ((*p >= '0') && (*p <= '9')) {
- // continue
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
- } else {
- tokenError();
- }
- break;
+ case ls_number_before_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ // continue
+ } else if (*p == '.') {
+ lex_state = ls_number_point;
+ } else if (*p == ' ') {
+ lex_state = ls_number;
+ action = ignore;
+ ready = true;
+ } else if (strchr("{}[]:,", *p)) {
+ lex_state = ls_number;
+ action = reread;
+ ready = true;
+ } else if (*p == 'e' || *p == 'E') {
+ lex_state = ls_number_e;
+ } else {
+ tokenError();
+ }
+ break;
- case ls_alpha:
- if ((*p >= 'a') && (*p <= 'z')) {
- // okay
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
- } else {
- tokenError();
- }
- break;
+ case ls_number_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ lex_state = ls_number_after_point;
+ } else {
+ tokenError();
+ }
+ break;
- case ls_string:
- if (*p == '"') {
- if (high_offset) {
- QTC::TC("libtests", "JSON 16 dangling high");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(high_offset) +
- ": UTF-16 high surrogate not followed by low "
- "surrogate");
+ case ls_number_after_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ // continue
+ } else if (*p == ' ') {
+ lex_state = ls_number;
+ action = ignore;
+ ready = true;
+ } else if (strchr("{}[]:,", *p)) {
+ lex_state = ls_number;
+ action = reread;
+ ready = true;
+ } else if (*p == 'e' || *p == 'E') {
+ lex_state = ls_number_e;
+ } else {
+ tokenError();
}
- action = ignore;
- ready = true;
- } else if (*p == '\\') {
- lex_state = ls_backslash;
- action = ignore;
- }
- break;
+ break;
- case ls_backslash:
- action = ignore;
- lex_state = ls_string;
- switch (*p) {
- case '\\':
- case '\"':
- case '/':
- // \/ is allowed in json input, but so is /, so we
- // don't map / to \/ in output.
- token += *p;
+ case ls_number_e:
+ if ((*p >= '0') && (*p <= '9')) {
+ lex_state = ls_number;
+ } else if ((*p == '+') || (*p == '-')) {
+ lex_state = ls_number_e_sign;
+ } else {
+ tokenError();
+ }
break;
- case 'b':
- token += '\b';
+
+ case ls_number_e_sign:
+ if ((*p >= '0') && (*p <= '9')) {
+ lex_state = ls_number;
+ } else {
+ tokenError();
+ }
break;
- case 'f':
- token += '\f';
+
+ case ls_number:
+ // We only get here after we have seen an exponent.
+ if ((*p >= '0') && (*p <= '9')) {
+ // continue
+ } else if (*p == ' ') {
+ action = ignore;
+ ready = true;
+ } else if (strchr("{}[]:,", *p)) {
+ action = reread;
+ ready = true;
+ } else {
+ tokenError();
+ }
break;
- case 'n':
- token += '\n';
+
+ case ls_alpha:
+ if ((*p >= 'a') && (*p <= 'z')) {
+ // okay
+ } else if (*p == ' ') {
+ action = ignore;
+ ready = true;
+ } else if (strchr("{}[]:,", *p)) {
+ action = reread;
+ ready = true;
+ } else {
+ tokenError();
+ }
break;
- case 'r':
- token += '\r';
+
+ case ls_string:
+ if (*p == '"') {
+ if (high_offset) {
+ QTC::TC("libtests", "JSON 16 dangling high");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(high_offset) +
+ ": UTF-16 high surrogate not followed by low "
+ "surrogate");
+ }
+ action = ignore;
+ ready = true;
+ } else if (*p == '\\') {
+ lex_state = ls_backslash;
+ action = ignore;
+ }
break;
- case 't':
- token += '\t';
+
+ case ls_backslash:
+ action = ignore;
+ lex_state = ls_string;
+ switch (*p) {
+ case '\\':
+ case '\"':
+ case '/':
+ // \/ is allowed in json input, but so is /, so we
+ // don't map / to \/ in output.
+ token += *p;
+ break;
+ case 'b':
+ token += '\b';
+ break;
+ case 'f':
+ token += '\f';
+ break;
+ case 'n':
+ token += '\n';
+ break;
+ case 'r':
+ token += '\r';
+ break;
+ case 't':
+ token += '\t';
+ break;
+ case 'u':
+ lex_state = ls_u4;
+ u_count = 0;
+ u_value = 0;
+ break;
+ default:
+ lex_state = ls_backslash;
+ tokenError();
+ }
break;
- case 'u':
- lex_state = ls_u4;
- u_count = 0;
- u_value = 0;
+
+ case ls_u4:
+ using ui = unsigned int;
+ action = ignore;
+ if ('0' <= *p && *p <= '9') {
+ u_value = 16 * u_value + (ui(*p) - ui('0'));
+ } else if ('a' <= *p && *p <= 'f') {
+ u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
+ } else if ('A' <= *p && *p <= 'F') {
+ u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
+ } else {
+ tokenError();
+ }
+ if (++u_count == 4) {
+ handle_u_code(
+ u_value,
+ offset - 5,
+ high_surrogate,
+ high_offset,
+ token);
+ lex_state = ls_string;
+ }
break;
+
default:
- lex_state = ls_backslash;
- tokenError();
+ throw std::logic_error(
+ "JSONParser::getToken : trying to handle delimiter state");
}
- break;
-
- case ls_u4:
- using ui = unsigned int;
- action = ignore;
- if ('0' <= *p && *p <= '9') {
- u_value = 16 * u_value + (ui(*p) - ui('0'));
- } else if ('a' <= *p && *p <= 'f') {
- u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
- } else if ('A' <= *p && *p <= 'F') {
- u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
- } else {
- tokenError();
+ switch (action) {
+ case reread:
+ break;
+ case append:
+ token.append(1, *p);
+ // fall through
+ case ignore:
+ ++p;
+ ++offset;
+ break;
}
- if (++u_count == 4) {
- handle_u_code(
- u_value, offset - 5, high_surrogate, high_offset, token);
- lex_state = ls_string;
+ if (ready) {
+ return;
}
- break;
-
- default:
- throw std::logic_error(
- "JSONParser::getToken : trying to handle delimiter state");
- }
- switch (action) {
- case reread:
- break;
- case append:
- token.append(1, *p);
- // fall through
- case ignore:
- ++p;
- ++offset;
- break;
- }
- if (ready) {
- break;
}
}
- if (done) {
- if (!token.empty() && !ready) {
- switch (lex_state) {
- case ls_top:
- // Can't happen
- throw std::logic_error("tok_start set in ls_top while parsing");
- break;
- case ls_number_leading_zero:
- case ls_number_before_point:
- case ls_number_after_point:
- lex_state = ls_number;
- break;
+ // We only get here if on end of input or if the last character was a
+ // control character.
- case ls_number:
- case ls_alpha:
- // terminal state
- break;
+ if (!token.empty()) {
+ switch (lex_state) {
+ case ls_top:
+ // Can't happen
+ throw std::logic_error("tok_start set in ls_top while parsing");
+ break;
- default:
- tokenError();
- }
+ case ls_number_leading_zero:
+ case ls_number_before_point:
+ case ls_number_after_point:
+ lex_state = ls_number;
+ break;
+
+ case ls_number:
+ case ls_alpha:
+ // terminal state
+ break;
+
+ default:
+ tokenError();
}
}
}
diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov
index 4b3bb45b..5e5c2e00 100644
--- a/libtests/libtests.testcov
+++ b/libtests/libtests.testcov
@@ -79,6 +79,7 @@ JSON parse number minus no digits 0
JSON parse incomplete number 0
JSON parse keyword bad character 0
JSON parse backslash bad character 0
+JSON parse control char in string 0
JSON parse leading zero 0
JSON parse ls premature end of input 0
JSON parse bad hex after u 0
diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test
index 8234b755..699544f6 100644
--- a/libtests/qtest/json_parse.test
+++ b/libtests/qtest/json_parse.test
@@ -125,10 +125,10 @@ my @bad = (
"e after minus", # 42
"missing digit after e", # 43
"missing digit after e+/-", # 44
- # "tab char in string", # 45
- # "cr char in string", # 46
- # "lf char in string", # 47
- # "bs char in string", # 48
+ "tab char in string", # 45
+ "cr char in string", # 46
+ "lf char in string", # 47
+ "bs char in string", # 48
);
my $i = 0;
diff --git a/libtests/qtest/json_parse/bad-01.out b/libtests/qtest/json_parse/bad-01.out
index a4254cff..8ae96c30 100644
--- a/libtests/qtest/json_parse/bad-01.out
+++ b/libtests/qtest/json_parse/bad-01.out
@@ -1 +1 @@
-exception: bad-01.json: JSON: offset 9: material follows end of object: junk
+exception: bad-01.json: JSON: offset 8: material follows end of object: junk
diff --git a/libtests/qtest/json_parse/bad-02.out b/libtests/qtest/json_parse/bad-02.out
index 485c9658..212b2f4f 100644
--- a/libtests/qtest/json_parse/bad-02.out
+++ b/libtests/qtest/json_parse/bad-02.out
@@ -1 +1 @@
-exception: bad-02.json: JSON: offset 11: material follows end of object: junk
+exception: bad-02.json: JSON: offset 10: material follows end of object: junk
diff --git a/libtests/qtest/json_parse/bad-03.out b/libtests/qtest/json_parse/bad-03.out
index 38f35119..a1411e0e 100644
--- a/libtests/qtest/json_parse/bad-03.out
+++ b/libtests/qtest/json_parse/bad-03.out
@@ -1 +1 @@
-exception: bad-03.json: JSON: offset 16: material follows end of object: junk
+exception: bad-03.json: JSON: offset 15: material follows end of object: junk
diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out
index 70fcbf74..4c1ecfeb 100644
--- a/libtests/qtest/json_parse/bad-27.out
+++ b/libtests/qtest/json_parse/bad-27.out
@@ -1 +1 @@
-exception: bad-27.json: JSON: premature end of input
+exception: bad-27.json: JSON: offset 5: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-31.json b/libtests/qtest/json_parse/bad-31.json
index 39cdd0de..277cc02f 100644
--- a/libtests/qtest/json_parse/bad-31.json
+++ b/libtests/qtest/json_parse/bad-31.json
@@ -1 +1 @@
--
+-
diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out
index ba7e4f16..d4320b0a 100644
--- a/libtests/qtest/json_parse/bad-45.out
+++ b/libtests/qtest/json_parse/bad-45.out
@@ -1 +1 @@
-"Tab in str\ting"
+exception: bad-45.json: JSON: offset 11: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out
index 2baad6a4..50aa5ffb 100644
--- a/libtests/qtest/json_parse/bad-46.out
+++ b/libtests/qtest/json_parse/bad-46.out
@@ -1 +1 @@
-"cr in str\ring"
+exception: bad-46.json: JSON: offset 10: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out
index 30549072..39f9d3d5 100644
--- a/libtests/qtest/json_parse/bad-47.out
+++ b/libtests/qtest/json_parse/bad-47.out
@@ -1 +1 @@
-"lf in str\ning"
+exception: bad-47.json: JSON: offset 10: control character in string (missing "?)