From cba1c352e3c4236205dc96de643e780abb3c7b64 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 11:47:06 +0000 Subject: In JSONParser add lex_state ls_number_minus --- libtests/libtests.testcov | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'libtests/libtests.testcov') diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 2ceef541..26cf2048 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -58,7 +58,6 @@ QPDFArgParser bad option for help 0 QPDFArgParser bad topic for help 0 QPDFArgParser invalid choice handler to unknown 0 JSON parse junk after object 0 -JSON parse decimal with no digits 0 JSON parse invalid keyword 0 JSON parse expected colon 0 JSON parse expected , or } 0 @@ -76,11 +75,11 @@ JSON parse duplicate point 0 JSON parse duplicate e 0 JSON parse unexpected sign 0 JSON parse numeric bad character 0 +JSON parse number minus no digits 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 JSON parse unterminated string 0 JSON parse leading zero 0 -JSON parse number no digits 0 JSON parse premature end of u 0 JSON parse bad hex after u 0 JSONHandler unhandled value 0 -- cgit v1.2.3-70-g09d2 From cdd1f0a9f60747ec0e25139dd530be3caf468eca Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 23 Jan 2023 20:08:11 +0000 Subject: In JSONParser add lex_state ls_number_point Also. remove '.' as starting char in lsTop. --- libqpdf/JSON.cc | 30 ++++++++++++++++++++---------- libtests/libtests.testcov | 1 + libtests/qtest/json_parse/bad-04.out | 2 +- libtests/qtest/json_parse/bad-30.out | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) (limited to 'libtests/libtests.testcov') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c8c4fdb0..b068aca2 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -650,6 +650,7 @@ namespace ls_number_minus, ls_number_leading_zero, ls_number_before_point, + ls_number_point, ls_alpha, ls_string, ls_backslash, @@ -825,6 +826,11 @@ JSONParser::numberError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); + } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) { + QTC::TC("libtests", "JSON parse incomplete number"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: incomplete number"); } else { QTC::TC("libtests", "JSON parse numeric bad character"); throw std::runtime_error( @@ -884,13 +890,6 @@ JSONParser::getToken() number_after_e = 0; number_saw_point = false; number_saw_e = false; - } else if (*p == '.') { - lex_state = ls_number; - number_before_point = 0; - number_after_point = 0; - number_after_e = 0; - number_saw_point = true; - number_saw_e = false; } else if (strchr("{}[]:,", *p)) { ready = true; } else { @@ -918,7 +917,7 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { - lex_state = ls_number; + lex_state = ls_number_point; } else if (*p == 'e') { lex_state = ls_number; } else if (QUtil::is_space(*p)) { @@ -940,7 +939,7 @@ JSONParser::getToken() ++number_before_point; } else if (*p == '.') { number_saw_point = true; - lex_state = ls_number; + lex_state = ls_number_point; } else if (*p == 'e') { number_saw_e = true; lex_state = ls_number; @@ -955,6 +954,15 @@ JSONParser::getToken() } break; + case ls_number_point: + if ((*p >= '0') && (*p <= '9')) { + ++number_after_point; + lex_state = ls_number; + } else { + numberError(); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -1083,6 +1091,7 @@ JSONParser::getToken() break; case ls_number_before_point: + case ls_number_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: @@ -1158,10 +1167,11 @@ JSONParser::handleToken() } break; - case ls_number_before_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: + case ls_number_before_point: + case ls_number_point: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 26cf2048..5ca5fb77 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -76,6 +76,7 @@ JSON parse duplicate e 0 JSON parse unexpected sign 0 JSON parse numeric bad character 0 JSON parse number minus no digits 0 +JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 JSON parse unterminated string 0 diff --git a/libtests/qtest/json_parse/bad-04.out b/libtests/qtest/json_parse/bad-04.out index 7fe71693..27d252f2 100644 --- a/libtests/qtest/json_parse/bad-04.out +++ b/libtests/qtest/json_parse/bad-04.out @@ -1 +1 @@ -exception: bad-04.json: JSON: offset 5: decimal point with no digits +exception: bad-04.json: JSON: offset 4: unexpected character . diff --git a/libtests/qtest/json_parse/bad-30.out b/libtests/qtest/json_parse/bad-30.out index bff961af..ec63bb09 100644 --- a/libtests/qtest/json_parse/bad-30.out +++ b/libtests/qtest/json_parse/bad-30.out @@ -1 +1 @@ -exception: bad-30.json: JSON: offset 5: decimal point with no digits +exception: bad-30.json: JSON: offset 4: numeric literal: incomplete number -- cgit v1.2.3-70-g09d2 From 83f972ceda20e244f52bde7ac052e6931a6d33d3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 09:55:34 +0000 Subject: Refactor end of input handling in JSONParser --- libqpdf/JSON.cc | 62 +++++++----------------------------- libtests/libtests.testcov | 3 +- libtests/qtest/json_parse/bad-27.out | 2 +- libtests/qtest/json_parse/bad-28.out | 2 +- libtests/qtest/json_parse/bad-34.out | 2 +- 5 files changed, 16 insertions(+), 55 deletions(-) (limited to 'libtests/libtests.testcov') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 3f1a3879..6ee11309 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1012,7 +1012,7 @@ JSONParser::getToken() case ls_number: // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { - ++number_after_e; + ++number_after_e; } else if (QUtil::is_space(*p)) { action = ignore; ready = true; @@ -1093,38 +1093,27 @@ JSONParser::getToken() } } if (done) { - if ((!token.empty()) && (!ready)) { + if (!token.empty() && !ready) { switch (lex_state) { case ls_top: // Can't happen throw std::logic_error("tok_start set in ls_top while parsing"); break; - case ls_number: - case ls_number_minus: case ls_number_leading_zero: case ls_number_before_point: - case ls_number_point: case ls_number_after_point: - case ls_number_e: - case ls_number_e_sign: - case ls_alpha: - // okay + lex_state = ls_number; break; - case ls_u4: - QTC::TC("libtests", "JSON parse premature end of u"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset - u_count - 1) + - ": \\u must be followed by four characters"); - - case ls_string: - case ls_backslash: - QTC::TC("libtests", "JSON parse unterminated string"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unterminated string"); + case ls_number: + case ls_alpha: + // terminal state break; + + default: + QTC::TC("libtests", "JSON parse ls premature end of input"); + throw std::runtime_error("JSON: premature end of input"); } } } @@ -1181,32 +1170,6 @@ JSONParser::handleToken() break; case ls_number: - case ls_number_minus: - case ls_number_leading_zero: - case ls_number_before_point: - case ls_number_point: - case ls_number_after_point: - case ls_number_e: - case ls_number_e_sign: - if (number_saw_point && (number_after_point == 0)) { - // QTC::TC("libtests", "JSON parse decimal with no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": decimal point with no digits"); - } - if ((number_before_point > 1) && - ((first_char == '0') || - ((first_char == '-') && (token.at(1) == '0')))) { - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with leading zero"); - } - if ((number_before_point == 0) && (number_after_point == 0)) { - // QTC::TC("libtests", "JSON parse number no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with no digits"); - } item = std::make_shared(JSON::makeNumber(token)); break; @@ -1229,10 +1192,9 @@ JSONParser::handleToken() item = std::make_shared(JSON::makeString(s_value)); break; - case ls_backslash: - case ls_u4: + default: throw std::logic_error( - "tok_end is set while state = ls_backslash or ls_u4"); + "JSONParser::handleToken : non-terminal lexer state encountered"); break; } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 5ca5fb77..4b3bb45b 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -79,9 +79,8 @@ JSON parse number minus no digits 0 JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 -JSON parse unterminated string 0 JSON parse leading zero 0 -JSON parse premature end of u 0 +JSON parse ls premature end of input 0 JSON parse bad hex after u 0 JSONHandler unhandled value 0 JSONHandler unexpected key 0 diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out index 2c2df076..70fcbf74 100644 --- a/libtests/qtest/json_parse/bad-27.out +++ b/libtests/qtest/json_parse/bad-27.out @@ -1 +1 @@ -exception: bad-27.json: JSON: offset 6: unterminated string +exception: bad-27.json: JSON: premature end of input diff --git a/libtests/qtest/json_parse/bad-28.out b/libtests/qtest/json_parse/bad-28.out index d7db2aea..005a68d2 100644 --- a/libtests/qtest/json_parse/bad-28.out +++ b/libtests/qtest/json_parse/bad-28.out @@ -1 +1 @@ -exception: bad-28.json: JSON: offset 16: unterminated string +exception: bad-28.json: JSON: premature end of input diff --git a/libtests/qtest/json_parse/bad-34.out b/libtests/qtest/json_parse/bad-34.out index f9db587a..c21838c4 100644 --- a/libtests/qtest/json_parse/bad-34.out +++ b/libtests/qtest/json_parse/bad-34.out @@ -1 +1 @@ -exception: bad-34.json: JSON: offset 3: \u must be followed by four characters +exception: bad-34.json: JSON: premature end of input -- cgit v1.2.3-70-g09d2 From ee32235f54884247f6117fc0fbdd462a4e38ac1f Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 15:21:29 +0000 Subject: In JSONParser::getToken handle legal control chars early Also, reject them in strings. --- libqpdf/JSON.cc | 564 ++++++++++++++++++---------------- libtests/libtests.testcov | 1 + libtests/qtest/json_parse.test | 8 +- libtests/qtest/json_parse/bad-01.out | 2 +- libtests/qtest/json_parse/bad-02.out | 2 +- libtests/qtest/json_parse/bad-03.out | 2 +- libtests/qtest/json_parse/bad-27.out | 2 +- libtests/qtest/json_parse/bad-31.json | 2 +- libtests/qtest/json_parse/bad-45.out | 2 +- libtests/qtest/json_parse/bad-46.out | 2 +- libtests/qtest/json_parse/bad-47.out | 2 +- 11 files changed, 305 insertions(+), 284 deletions(-) (limited to 'libtests/libtests.testcov') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index afeda315..e9637e86 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -723,10 +723,11 @@ JSONParser::handle_u_code( void JSONParser::tokenError() { - if (bytes == 0) { + if (done) { QTC::TC("libtests", "JSON parse ls premature end of input"); throw std::runtime_error("JSON: premature end of input"); } + if (lex_state == ls_u4) { QTC::TC("libtests", "JSON parse bad hex after u"); throw std::runtime_error( @@ -737,6 +738,11 @@ JSONParser::tokenError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " + std::string(p, 1)); + } else if (lex_state == ls_string) { + QTC::TC("libtests", "JSON parse control char in string"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": control character in string (missing \"?)"); } else if (lex_state == ls_backslash) { QTC::TC("libtests", "JSON parse backslash bad character"); throw std::runtime_error( @@ -779,6 +785,7 @@ JSONParser::tokenError() "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " + std::string(p, 1)); } + throw std::logic_error("JSON::tokenError : unhandled error"); } void @@ -792,7 +799,7 @@ JSONParser::getToken() unsigned long high_surrogate = 0; qpdf_offset_t high_offset = 0; - while (!done) { + while (true) { if (p == (buf + bytes)) { p = buf; bytes = is.read(buf, sizeof(buf)); @@ -808,307 +815,320 @@ JSONParser::getToken() // end the current token (unless we are still before the start // of the token). if (lex_state == ls_top) { - // Continue with token + ++p; + ++offset; } else { - // done + break; } + } else { QTC::TC("libtests", "JSON parse null character"); throw std::runtime_error( "JSON: control or null character at offset " + std::to_string(offset)); } - } - action = append; - switch (lex_state) { - case ls_top: - token_start = offset; - if (*p == '"') { - lex_state = ls_string; - action = ignore; - } else if (QUtil::is_space(*p)) { - action = ignore; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ':') { - lex_state = ls_colon; - action = ignore; - ready = true; - } else if (*p == '{') { - lex_state = ls_begin_dict; - action = ignore; - ready = true; - } else if (*p == '}') { - lex_state = ls_end_dict; - action = ignore; - ready = true; - } else if (*p == '[') { - lex_state = ls_begin_array; - action = ignore; - ready = true; - } else if (*p == ']') { - lex_state = ls_end_array; - action = ignore; - ready = true; - } else if ((*p >= 'a') && (*p <= 'z')) { - lex_state = ls_alpha; - } else if (*p == '-') { - lex_state = ls_number_minus; - } else if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; - } else if (*p == '0') { - lex_state = ls_number_leading_zero; - } else { - QTC::TC("libtests", "JSON parse bad character"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected character " + std::string(p, 1)); - } - break; - - case ls_number_minus: - if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; - } else if (*p == '0') { - lex_state = ls_number_leading_zero; - } else { - QTC::TC("libtests", "JSON parse number minus no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: no digit after minus sign"); - } - break; - - case ls_number_leading_zero: - if (*p == '.') { - lex_state = ls_number_point; - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - QTC::TC("libtests", "JSON parse leading zero"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with leading zero"); - } - break; - - case ls_number_before_point: - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (*p == '.') { - lex_state = ls_number_point; - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - tokenError(); - } - break; - - case ls_number_point: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number_after_point; - } else { - tokenError(); - } - break; - - case ls_number_after_point: - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - tokenError(); - } - break; + } else { + action = append; + switch (lex_state) { + case ls_top: + token_start = offset; + if (*p == '"') { + lex_state = ls_string; + action = ignore; + } else if (*p == ' ') { + action = ignore; + } else if (*p == ',') { + lex_state = ls_comma; + action = ignore; + ready = true; + } else if (*p == ',') { + lex_state = ls_comma; + action = ignore; + ready = true; + } else if (*p == ':') { + lex_state = ls_colon; + action = ignore; + ready = true; + } else if (*p == '{') { + lex_state = ls_begin_dict; + action = ignore; + ready = true; + } else if (*p == '}') { + lex_state = ls_end_dict; + action = ignore; + ready = true; + } else if (*p == '[') { + lex_state = ls_begin_array; + action = ignore; + ready = true; + } else if (*p == ']') { + lex_state = ls_end_array; + action = ignore; + ready = true; + } else if ((*p >= 'a') && (*p <= 'z')) { + lex_state = ls_alpha; + } else if (*p == '-') { + lex_state = ls_number_minus; + } else if ((*p >= '1') && (*p <= '9')) { + lex_state = ls_number_before_point; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + } else { + QTC::TC("libtests", "JSON parse bad character"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected character " + std::string(p, 1)); + } + break; - case ls_number_e: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; - } else if ((*p == '+') || (*p == '-')) { - lex_state = ls_number_e_sign; - } else { - tokenError(); - } - break; + case ls_number_minus: + if ((*p >= '1') && (*p <= '9')) { + lex_state = ls_number_before_point; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + } else { + QTC::TC("libtests", "JSON parse number minus no digits"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: no digit after minus sign"); + } + break; - case ls_number_e_sign: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; - } else { - tokenError(); - } - break; + case ls_number_leading_zero: + if (*p == '.') { + lex_state = ls_number_point; + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + QTC::TC("libtests", "JSON parse leading zero"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": number with leading zero"); + } + break; - case ls_number: - // We only get here after we have seen an exponent. - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (QUtil::is_space(*p)) { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; - } else { - tokenError(); - } - break; + case ls_number_before_point: + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == '.') { + lex_state = ls_number_point; + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + tokenError(); + } + break; - case ls_alpha: - if ((*p >= 'a') && (*p <= 'z')) { - // okay - } else if (QUtil::is_space(*p)) { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; - } else { - tokenError(); - } - break; + case ls_number_point: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number_after_point; + } else { + tokenError(); + } + break; - case ls_string: - if (*p == '"') { - if (high_offset) { - QTC::TC("libtests", "JSON 16 dangling high"); - throw std::runtime_error( - "JSON: offset " + std::to_string(high_offset) + - ": UTF-16 high surrogate not followed by low " - "surrogate"); + case ls_number_after_point: + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + tokenError(); } - action = ignore; - ready = true; - } else if (*p == '\\') { - lex_state = ls_backslash; - action = ignore; - } - break; + break; - case ls_backslash: - action = ignore; - lex_state = ls_string; - switch (*p) { - case '\\': - case '\"': - case '/': - // \/ is allowed in json input, but so is /, so we - // don't map / to \/ in output. - token += *p; + case ls_number_e: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number; + } else if ((*p == '+') || (*p == '-')) { + lex_state = ls_number_e_sign; + } else { + tokenError(); + } break; - case 'b': - token += '\b'; + + case ls_number_e_sign: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number; + } else { + tokenError(); + } break; - case 'f': - token += '\f'; + + case ls_number: + // We only get here after we have seen an exponent. + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == ' ') { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + tokenError(); + } break; - case 'n': - token += '\n'; + + case ls_alpha: + if ((*p >= 'a') && (*p <= 'z')) { + // okay + } else if (*p == ' ') { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + tokenError(); + } break; - case 'r': - token += '\r'; + + case ls_string: + if (*p == '"') { + if (high_offset) { + QTC::TC("libtests", "JSON 16 dangling high"); + throw std::runtime_error( + "JSON: offset " + std::to_string(high_offset) + + ": UTF-16 high surrogate not followed by low " + "surrogate"); + } + action = ignore; + ready = true; + } else if (*p == '\\') { + lex_state = ls_backslash; + action = ignore; + } break; - case 't': - token += '\t'; + + case ls_backslash: + action = ignore; + lex_state = ls_string; + switch (*p) { + case '\\': + case '\"': + case '/': + // \/ is allowed in json input, but so is /, so we + // don't map / to \/ in output. + token += *p; + break; + case 'b': + token += '\b'; + break; + case 'f': + token += '\f'; + break; + case 'n': + token += '\n'; + break; + case 'r': + token += '\r'; + break; + case 't': + token += '\t'; + break; + case 'u': + lex_state = ls_u4; + u_count = 0; + u_value = 0; + break; + default: + lex_state = ls_backslash; + tokenError(); + } break; - case 'u': - lex_state = ls_u4; - u_count = 0; - u_value = 0; + + case ls_u4: + using ui = unsigned int; + action = ignore; + if ('0' <= *p && *p <= '9') { + u_value = 16 * u_value + (ui(*p) - ui('0')); + } else if ('a' <= *p && *p <= 'f') { + u_value = 16 * u_value + (10 + ui(*p) - ui('a')); + } else if ('A' <= *p && *p <= 'F') { + u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + } else { + tokenError(); + } + if (++u_count == 4) { + handle_u_code( + u_value, + offset - 5, + high_surrogate, + high_offset, + token); + lex_state = ls_string; + } break; + default: - lex_state = ls_backslash; - tokenError(); + throw std::logic_error( + "JSONParser::getToken : trying to handle delimiter state"); } - break; - - case ls_u4: - using ui = unsigned int; - action = ignore; - if ('0' <= *p && *p <= '9') { - u_value = 16 * u_value + (ui(*p) - ui('0')); - } else if ('a' <= *p && *p <= 'f') { - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); - } else if ('A' <= *p && *p <= 'F') { - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); - } else { - tokenError(); + switch (action) { + case reread: + break; + case append: + token.append(1, *p); + // fall through + case ignore: + ++p; + ++offset; + break; } - if (++u_count == 4) { - handle_u_code( - u_value, offset - 5, high_surrogate, high_offset, token); - lex_state = ls_string; + if (ready) { + return; } - break; - - default: - throw std::logic_error( - "JSONParser::getToken : trying to handle delimiter state"); - } - switch (action) { - case reread: - break; - case append: - token.append(1, *p); - // fall through - case ignore: - ++p; - ++offset; - break; - } - if (ready) { - break; } } - if (done) { - if (!token.empty() && !ready) { - switch (lex_state) { - case ls_top: - // Can't happen - throw std::logic_error("tok_start set in ls_top while parsing"); - break; - case ls_number_leading_zero: - case ls_number_before_point: - case ls_number_after_point: - lex_state = ls_number; - break; + // We only get here if on end of input or if the last character was a + // control character. - case ls_number: - case ls_alpha: - // terminal state - break; + if (!token.empty()) { + switch (lex_state) { + case ls_top: + // Can't happen + throw std::logic_error("tok_start set in ls_top while parsing"); + break; - default: - tokenError(); - } + case ls_number_leading_zero: + case ls_number_before_point: + case ls_number_after_point: + lex_state = ls_number; + break; + + case ls_number: + case ls_alpha: + // terminal state + break; + + default: + tokenError(); } } } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 4b3bb45b..5e5c2e00 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -79,6 +79,7 @@ JSON parse number minus no digits 0 JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 +JSON parse control char in string 0 JSON parse leading zero 0 JSON parse ls premature end of input 0 JSON parse bad hex after u 0 diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 8234b755..699544f6 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -125,10 +125,10 @@ my @bad = ( "e after minus", # 42 "missing digit after e", # 43 "missing digit after e+/-", # 44 - # "tab char in string", # 45 - # "cr char in string", # 46 - # "lf char in string", # 47 - # "bs char in string", # 48 + "tab char in string", # 45 + "cr char in string", # 46 + "lf char in string", # 47 + "bs char in string", # 48 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-01.out b/libtests/qtest/json_parse/bad-01.out index a4254cff..8ae96c30 100644 --- a/libtests/qtest/json_parse/bad-01.out +++ b/libtests/qtest/json_parse/bad-01.out @@ -1 +1 @@ -exception: bad-01.json: JSON: offset 9: material follows end of object: junk +exception: bad-01.json: JSON: offset 8: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-02.out b/libtests/qtest/json_parse/bad-02.out index 485c9658..212b2f4f 100644 --- a/libtests/qtest/json_parse/bad-02.out +++ b/libtests/qtest/json_parse/bad-02.out @@ -1 +1 @@ -exception: bad-02.json: JSON: offset 11: material follows end of object: junk +exception: bad-02.json: JSON: offset 10: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-03.out b/libtests/qtest/json_parse/bad-03.out index 38f35119..a1411e0e 100644 --- a/libtests/qtest/json_parse/bad-03.out +++ b/libtests/qtest/json_parse/bad-03.out @@ -1 +1 @@ -exception: bad-03.json: JSON: offset 16: material follows end of object: junk +exception: bad-03.json: JSON: offset 15: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out index 70fcbf74..4c1ecfeb 100644 --- a/libtests/qtest/json_parse/bad-27.out +++ b/libtests/qtest/json_parse/bad-27.out @@ -1 +1 @@ -exception: bad-27.json: JSON: premature end of input +exception: bad-27.json: JSON: offset 5: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-31.json b/libtests/qtest/json_parse/bad-31.json index 39cdd0de..277cc02f 100644 --- a/libtests/qtest/json_parse/bad-31.json +++ b/libtests/qtest/json_parse/bad-31.json @@ -1 +1 @@ -- +- diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out index ba7e4f16..d4320b0a 100644 --- a/libtests/qtest/json_parse/bad-45.out +++ b/libtests/qtest/json_parse/bad-45.out @@ -1 +1 @@ -"Tab in str\ting" +exception: bad-45.json: JSON: offset 11: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out index 2baad6a4..50aa5ffb 100644 --- a/libtests/qtest/json_parse/bad-46.out +++ b/libtests/qtest/json_parse/bad-46.out @@ -1 +1 @@ -"cr in str\ring" +exception: bad-46.json: JSON: offset 10: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out index 30549072..39f9d3d5 100644 --- a/libtests/qtest/json_parse/bad-47.out +++ b/libtests/qtest/json_parse/bad-47.out @@ -1 +1 @@ -"lf in str\ning" +exception: bad-47.json: JSON: offset 10: control character in string (missing "?) -- cgit v1.2.3-70-g09d2