aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2023-01-30 14:17:09 +0100
committerm-holger <m-holger@kubitscheck.org>2023-02-04 16:00:21 +0100
commitd3152869b666a725d303e0667a69f973fc5a96ed (patch)
tree436e418534d89205ff64e317b43ca1d34ab2fb0b
parentee32235f54884247f6117fc0fbdd462a4e38ac1f (diff)
downloadqpdf-d3152869b666a725d303e0667a69f973fc5a96ed.tar.zst
In JSONParser::getToken handle structural and space chars early
-rw-r--r--libqpdf/JSON.cc164
-rw-r--r--libtests/qtest/json_parse/bad-09.out2
-rw-r--r--libtests/qtest/json_parse/bad-31.out2
3 files changed, 95 insertions, 73 deletions
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc
index e9637e86..59843c05 100644
--- a/libqpdf/JSON.cc
+++ b/libqpdf/JSON.cc
@@ -791,7 +791,7 @@ JSONParser::tokenError()
void
JSONParser::getToken()
{
- enum { append, ignore, reread } action = append;
+ enum { append, ignore } action = append;
bool ready = false;
token.clear();
@@ -820,13 +820,103 @@ JSONParser::getToken()
} else {
break;
}
-
} else {
QTC::TC("libtests", "JSON parse null character");
throw std::runtime_error(
"JSON: control or null character at offset " +
std::to_string(offset));
}
+ } else if (*p == ',') {
+ if (lex_state == ls_top) {
+ ++p;
+ ++offset;
+ lex_state = ls_comma;
+ return;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
+ } else if (*p == ':') {
+ if (lex_state == ls_top) {
+ ++p;
+ ++offset;
+ lex_state = ls_colon;
+ return;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
+ } else if (*p == ' ') {
+ if (lex_state == ls_top) {
+ ++p;
+ ++offset;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
+ } else if (*p == '{') {
+ if (lex_state == ls_top) {
+ token_start = offset;
+ ++p;
+ ++offset;
+ lex_state = ls_begin_dict;
+ return;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
+ } else if (*p == '}') {
+ if (lex_state == ls_top) {
+ ++p;
+ ++offset;
+ lex_state = ls_end_dict;
+ return;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
+ } else if (*p == '[') {
+ if (lex_state == ls_top) {
+ token_start = offset;
+ ++p;
+ ++offset;
+ lex_state = ls_begin_array;
+ return;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
+ } else if (*p == ']') {
+ if (lex_state == ls_top) {
+ ++p;
+ ++offset;
+ lex_state = ls_end_array;
+ return;
+ } else if (lex_state == ls_string) {
+ token += *p;
+ ++p;
+ ++offset;
+ } else {
+ break;
+ }
} else {
action = append;
switch (lex_state) {
@@ -835,36 +925,6 @@ JSONParser::getToken()
if (*p == '"') {
lex_state = ls_string;
action = ignore;
- } else if (*p == ' ') {
- action = ignore;
- } else if (*p == ',') {
- lex_state = ls_comma;
- action = ignore;
- ready = true;
- } else if (*p == ',') {
- lex_state = ls_comma;
- action = ignore;
- ready = true;
- } else if (*p == ':') {
- lex_state = ls_colon;
- action = ignore;
- ready = true;
- } else if (*p == '{') {
- lex_state = ls_begin_dict;
- action = ignore;
- ready = true;
- } else if (*p == '}') {
- lex_state = ls_end_dict;
- action = ignore;
- ready = true;
- } else if (*p == '[') {
- lex_state = ls_begin_array;
- action = ignore;
- ready = true;
- } else if (*p == ']') {
- lex_state = ls_end_array;
- action = ignore;
- ready = true;
} else if ((*p >= 'a') && (*p <= 'z')) {
lex_state = ls_alpha;
} else if (*p == '-') {
@@ -897,14 +957,6 @@ JSONParser::getToken()
case ls_number_leading_zero:
if (*p == '.') {
lex_state = ls_number_point;
- } else if (*p == ' ') {
- lex_state = ls_number;
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- lex_state = ls_number;
- action = reread;
- ready = true;
} else if (*p == 'e' || *p == 'E') {
lex_state = ls_number_e;
} else {
@@ -920,14 +972,6 @@ JSONParser::getToken()
// continue
} else if (*p == '.') {
lex_state = ls_number_point;
- } else if (*p == ' ') {
- lex_state = ls_number;
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- lex_state = ls_number;
- action = reread;
- ready = true;
} else if (*p == 'e' || *p == 'E') {
lex_state = ls_number_e;
} else {
@@ -946,14 +990,6 @@ JSONParser::getToken()
case ls_number_after_point:
if ((*p >= '0') && (*p <= '9')) {
// continue
- } else if (*p == ' ') {
- lex_state = ls_number;
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- lex_state = ls_number;
- action = reread;
- ready = true;
} else if (*p == 'e' || *p == 'E') {
lex_state = ls_number_e;
} else {
@@ -983,12 +1019,6 @@ JSONParser::getToken()
// We only get here after we have seen an exponent.
if ((*p >= '0') && (*p <= '9')) {
// continue
- } else if (*p == ' ') {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
} else {
tokenError();
}
@@ -997,12 +1027,6 @@ JSONParser::getToken()
case ls_alpha:
if ((*p >= 'a') && (*p <= 'z')) {
// okay
- } else if (*p == ' ') {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
} else {
tokenError();
}
@@ -1090,8 +1114,6 @@ JSONParser::getToken()
"JSONParser::getToken : trying to handle delimiter state");
}
switch (action) {
- case reread:
- break;
case append:
token.append(1, *p);
// fall through
@@ -1107,7 +1129,7 @@ JSONParser::getToken()
}
// We only get here if on end of input or if the last character was a
- // control character.
+ // control character or other delimiter.
if (!token.empty()) {
switch (lex_state) {
diff --git a/libtests/qtest/json_parse/bad-09.out b/libtests/qtest/json_parse/bad-09.out
index 21d2f1c1..979d53d0 100644
--- a/libtests/qtest/json_parse/bad-09.out
+++ b/libtests/qtest/json_parse/bad-09.out
@@ -1 +1 @@
-exception: bad-09.json: JSON: offset 3: expect string as dictionary key
+exception: bad-09.json: JSON: offset 2: expect string as dictionary key
diff --git a/libtests/qtest/json_parse/bad-31.out b/libtests/qtest/json_parse/bad-31.out
index 2228d08d..af177726 100644
--- a/libtests/qtest/json_parse/bad-31.out
+++ b/libtests/qtest/json_parse/bad-31.out
@@ -1 +1 @@
-exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign
+exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number