From cba1c352e3c4236205dc96de643e780abb3c7b64 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 11:47:06 +0000 Subject: In JSONParser add lex_state ls_number_minus --- libtests/qtest/json_parse/bad-31.out | 2 +- libtests/qtest/json_parse/bad-41.json | 2 ++ libtests/qtest/json_parse/bad-41.out | 1 + libtests/qtest/json_parse/bad-42.json | 1 + libtests/qtest/json_parse/bad-42.out | 1 + 5 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 libtests/qtest/json_parse/bad-41.json create mode 100644 libtests/qtest/json_parse/bad-41.out create mode 100644 libtests/qtest/json_parse/bad-42.json create mode 100644 libtests/qtest/json_parse/bad-42.out (limited to 'libtests/qtest/json_parse') diff --git a/libtests/qtest/json_parse/bad-31.out b/libtests/qtest/json_parse/bad-31.out index 344f42e8..2228d08d 100644 --- a/libtests/qtest/json_parse/bad-31.out +++ b/libtests/qtest/json_parse/bad-31.out @@ -1 +1 @@ -exception: bad-31.json: JSON: offset 2: number with no digits +exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign diff --git a/libtests/qtest/json_parse/bad-41.json b/libtests/qtest/json_parse/bad-41.json new file mode 100644 index 00000000..dad59049 --- /dev/null +++ b/libtests/qtest/json_parse/bad-41.json @@ -0,0 +1,2 @@ +-.123 + diff --git a/libtests/qtest/json_parse/bad-41.out b/libtests/qtest/json_parse/bad-41.out new file mode 100644 index 00000000..bebcfdb9 --- /dev/null +++ b/libtests/qtest/json_parse/bad-41.out @@ -0,0 +1 @@ +exception: bad-41.json: JSON: offset 1: numeric literal: no digit after minus sign diff --git a/libtests/qtest/json_parse/bad-42.json b/libtests/qtest/json_parse/bad-42.json new file mode 100644 index 00000000..2f9148b0 --- /dev/null +++ b/libtests/qtest/json_parse/bad-42.json @@ -0,0 +1 @@ +-e123 diff --git a/libtests/qtest/json_parse/bad-42.out b/libtests/qtest/json_parse/bad-42.out new file mode 100644 index 00000000..96e9a0a3 --- /dev/null +++ b/libtests/qtest/json_parse/bad-42.out @@ -0,0 +1 @@ +exception: bad-42.json: JSON: offset 1: numeric literal: no digit after minus sign -- cgit v1.2.3-54-g00ecf From 08e768909d9760a3588b8a8eaaeda0b357a85c62 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 11:22:06 +0000 Subject: In JSONParser add lex_state ls_number_leading_zero --- libqpdf/JSON.cc | 34 +++++++++++++++++++++++++++++++--- libtests/qtest/json_parse/bad-32.out | 2 +- libtests/qtest/json_parse/bad-33.out | 2 +- 3 files changed, 33 insertions(+), 5 deletions(-) (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c881811b..1c3378a7 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -648,6 +648,7 @@ namespace ls_top, ls_number, ls_number_minus, + ls_number_leading_zero, ls_alpha, ls_string, ls_backslash, @@ -868,13 +869,20 @@ JSONParser::getToken() number_after_e = 0; number_saw_point = false; number_saw_e = false; - } else if ((*p >= '0') && (*p <= '9')) { + } else if ((*p >= '1') && (*p <= '9')) { lex_state = ls_number; number_before_point = 1; number_after_point = 0; number_after_e = 0; number_saw_point = false; number_saw_e = false; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + number_before_point = 1; + number_after_point = 0; + number_after_e = 0; + number_saw_point = false; + number_saw_e = false; } else if (*p == '.') { lex_state = ls_number; number_before_point = 0; @@ -898,7 +906,7 @@ JSONParser::getToken() lex_state = ls_number; } else if (*p == '0') { ++number_before_point; - lex_state = ls_number; + lex_state = ls_number_leading_zero; } else { QTC::TC("libtests", "JSON parse number minus no digits"); throw std::runtime_error( @@ -907,6 +915,25 @@ JSONParser::getToken() } break; + case ls_number_leading_zero: + if (*p == '.') { + lex_state = ls_number; + } else if (*p == 'e') { + lex_state = ls_number; + } else if (QUtil::is_space(*p)) { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + QTC::TC("libtests", "JSON parse leading zero"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": number with leading zero"); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -1037,6 +1064,7 @@ JSONParser::getToken() case ls_number: case ls_number_minus: + case ls_number_leading_zero: case ls_alpha: // okay break; @@ -1111,6 +1139,7 @@ JSONParser::handleToken() case ls_number: case ls_number_minus: + case ls_number_leading_zero: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( @@ -1120,7 +1149,6 @@ JSONParser::handleToken() if ((number_before_point > 1) && ((first_char == '0') || ((first_char == '-') && (token.at(1) == '0')))) { - QTC::TC("libtests", "JSON parse leading zero"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": number with leading zero"); diff --git a/libtests/qtest/json_parse/bad-32.out b/libtests/qtest/json_parse/bad-32.out index 4372e0cf..41a681c0 100644 --- a/libtests/qtest/json_parse/bad-32.out +++ b/libtests/qtest/json_parse/bad-32.out @@ -1 +1 @@ -exception: bad-32.json: JSON: offset 5: number with leading zero +exception: bad-32.json: JSON: offset 1: number with leading zero diff --git a/libtests/qtest/json_parse/bad-33.out b/libtests/qtest/json_parse/bad-33.out index ae41e48b..98a72942 100644 --- a/libtests/qtest/json_parse/bad-33.out +++ b/libtests/qtest/json_parse/bad-33.out @@ -1 +1 @@ -exception: bad-33.json: JSON: offset 6: number with leading zero +exception: bad-33.json: JSON: offset 2: number with leading zero -- cgit v1.2.3-54-g00ecf From cdd1f0a9f60747ec0e25139dd530be3caf468eca Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 23 Jan 2023 20:08:11 +0000 Subject: In JSONParser add lex_state ls_number_point Also. remove '.' as starting char in lsTop. --- libqpdf/JSON.cc | 30 ++++++++++++++++++++---------- libtests/libtests.testcov | 1 + libtests/qtest/json_parse/bad-04.out | 2 +- libtests/qtest/json_parse/bad-30.out | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c8c4fdb0..b068aca2 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -650,6 +650,7 @@ namespace ls_number_minus, ls_number_leading_zero, ls_number_before_point, + ls_number_point, ls_alpha, ls_string, ls_backslash, @@ -825,6 +826,11 @@ JSONParser::numberError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); + } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) { + QTC::TC("libtests", "JSON parse incomplete number"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: incomplete number"); } else { QTC::TC("libtests", "JSON parse numeric bad character"); throw std::runtime_error( @@ -884,13 +890,6 @@ JSONParser::getToken() number_after_e = 0; number_saw_point = false; number_saw_e = false; - } else if (*p == '.') { - lex_state = ls_number; - number_before_point = 0; - number_after_point = 0; - number_after_e = 0; - number_saw_point = true; - number_saw_e = false; } else if (strchr("{}[]:,", *p)) { ready = true; } else { @@ -918,7 +917,7 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { - lex_state = ls_number; + lex_state = ls_number_point; } else if (*p == 'e') { lex_state = ls_number; } else if (QUtil::is_space(*p)) { @@ -940,7 +939,7 @@ JSONParser::getToken() ++number_before_point; } else if (*p == '.') { number_saw_point = true; - lex_state = ls_number; + lex_state = ls_number_point; } else if (*p == 'e') { number_saw_e = true; lex_state = ls_number; @@ -955,6 +954,15 @@ JSONParser::getToken() } break; + case ls_number_point: + if ((*p >= '0') && (*p <= '9')) { + ++number_after_point; + lex_state = ls_number; + } else { + numberError(); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -1083,6 +1091,7 @@ JSONParser::getToken() break; case ls_number_before_point: + case ls_number_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: @@ -1158,10 +1167,11 @@ JSONParser::handleToken() } break; - case ls_number_before_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: + case ls_number_before_point: + case ls_number_point: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 26cf2048..5ca5fb77 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -76,6 +76,7 @@ JSON parse duplicate e 0 JSON parse unexpected sign 0 JSON parse numeric bad character 0 JSON parse number minus no digits 0 +JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 JSON parse unterminated string 0 diff --git a/libtests/qtest/json_parse/bad-04.out b/libtests/qtest/json_parse/bad-04.out index 7fe71693..27d252f2 100644 --- a/libtests/qtest/json_parse/bad-04.out +++ b/libtests/qtest/json_parse/bad-04.out @@ -1 +1 @@ -exception: bad-04.json: JSON: offset 5: decimal point with no digits +exception: bad-04.json: JSON: offset 4: unexpected character . diff --git a/libtests/qtest/json_parse/bad-30.out b/libtests/qtest/json_parse/bad-30.out index bff961af..ec63bb09 100644 --- a/libtests/qtest/json_parse/bad-30.out +++ b/libtests/qtest/json_parse/bad-30.out @@ -1 +1 @@ -exception: bad-30.json: JSON: offset 5: decimal point with no digits +exception: bad-30.json: JSON: offset 4: numeric literal: incomplete number -- cgit v1.2.3-54-g00ecf From 6ea543e6c731db30b0807f531a445a66c66619b9 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 13:04:31 +0000 Subject: In JSONParser add lex_state ls_number_e_sign --- libqpdf/JSON.cc | 17 ++++++++++++++--- libtests/qtest/json_parse.test | 2 ++ libtests/qtest/json_parse/bad-43.json | 1 + libtests/qtest/json_parse/bad-43.out | 1 + libtests/qtest/json_parse/bad-44.json | 1 + libtests/qtest/json_parse/bad-44.out | 1 + 6 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 libtests/qtest/json_parse/bad-43.json create mode 100644 libtests/qtest/json_parse/bad-43.out create mode 100644 libtests/qtest/json_parse/bad-44.json create mode 100644 libtests/qtest/json_parse/bad-44.out (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index d6baf584..8e55b08c 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -653,6 +653,7 @@ namespace ls_number_point, ls_number_after_point, ls_number_e, + ls_number_e_sign, ls_alpha, ls_string, ls_backslash, @@ -988,6 +989,14 @@ JSONParser::getToken() ++number_after_e; lex_state = ls_number; } else if ((*p == '+') || (*p == '-')) { + lex_state = ls_number_e_sign; + } else { + numberError(); + } + break; + + case ls_number_e_sign: + if ((*p >= '0') && (*p <= '9')) { lex_state = ls_number; } else { numberError(); @@ -1120,13 +1129,14 @@ JSONParser::getToken() throw std::logic_error("tok_start set in ls_top while parsing"); break; + case ls_number: + case ls_number_minus: + case ls_number_leading_zero: case ls_number_before_point: case ls_number_point: case ls_number_after_point: case ls_number_e: - case ls_number: - case ls_number_minus: - case ls_number_leading_zero: + case ls_number_e_sign: case ls_alpha: // okay break; @@ -1206,6 +1216,7 @@ JSONParser::handleToken() case ls_number_point: case ls_number_after_point: case ls_number_e: + case ls_number_e_sign: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 7c64e3bd..d38d70de 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -123,6 +123,8 @@ my @bad = ( "duplicate dictionary key", # 40 "decimal point after minus",# 41 "e after minus", # 42 + "missing digit after e", # 43 + "missing digit after e+/-", # 44 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-43.json b/libtests/qtest/json_parse/bad-43.json new file mode 100644 index 00000000..896a676a --- /dev/null +++ b/libtests/qtest/json_parse/bad-43.json @@ -0,0 +1 @@ +123e diff --git a/libtests/qtest/json_parse/bad-43.out b/libtests/qtest/json_parse/bad-43.out new file mode 100644 index 00000000..84070aa9 --- /dev/null +++ b/libtests/qtest/json_parse/bad-43.out @@ -0,0 +1 @@ +exception: bad-43.json: JSON: offset 4: numeric literal: incomplete number diff --git a/libtests/qtest/json_parse/bad-44.json b/libtests/qtest/json_parse/bad-44.json new file mode 100644 index 00000000..3a5d7dff --- /dev/null +++ b/libtests/qtest/json_parse/bad-44.json @@ -0,0 +1 @@ +123e+ diff --git a/libtests/qtest/json_parse/bad-44.out b/libtests/qtest/json_parse/bad-44.out new file mode 100644 index 00000000..f72120c4 --- /dev/null +++ b/libtests/qtest/json_parse/bad-44.out @@ -0,0 +1 @@ +exception: bad-44.json: JSON: offset 5: numeric literal: incomplete number -- cgit v1.2.3-54-g00ecf From 83f972ceda20e244f52bde7ac052e6931a6d33d3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 09:55:34 +0000 Subject: Refactor end of input handling in JSONParser --- libqpdf/JSON.cc | 62 +++++++----------------------------- libtests/libtests.testcov | 3 +- libtests/qtest/json_parse/bad-27.out | 2 +- libtests/qtest/json_parse/bad-28.out | 2 +- libtests/qtest/json_parse/bad-34.out | 2 +- 5 files changed, 16 insertions(+), 55 deletions(-) (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 3f1a3879..6ee11309 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1012,7 +1012,7 @@ JSONParser::getToken() case ls_number: // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { - ++number_after_e; + ++number_after_e; } else if (QUtil::is_space(*p)) { action = ignore; ready = true; @@ -1093,38 +1093,27 @@ JSONParser::getToken() } } if (done) { - if ((!token.empty()) && (!ready)) { + if (!token.empty() && !ready) { switch (lex_state) { case ls_top: // Can't happen throw std::logic_error("tok_start set in ls_top while parsing"); break; - case ls_number: - case ls_number_minus: case ls_number_leading_zero: case ls_number_before_point: - case ls_number_point: case ls_number_after_point: - case ls_number_e: - case ls_number_e_sign: - case ls_alpha: - // okay + lex_state = ls_number; break; - case ls_u4: - QTC::TC("libtests", "JSON parse premature end of u"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset - u_count - 1) + - ": \\u must be followed by four characters"); - - case ls_string: - case ls_backslash: - QTC::TC("libtests", "JSON parse unterminated string"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unterminated string"); + case ls_number: + case ls_alpha: + // terminal state break; + + default: + QTC::TC("libtests", "JSON parse ls premature end of input"); + throw std::runtime_error("JSON: premature end of input"); } } } @@ -1181,32 +1170,6 @@ JSONParser::handleToken() break; case ls_number: - case ls_number_minus: - case ls_number_leading_zero: - case ls_number_before_point: - case ls_number_point: - case ls_number_after_point: - case ls_number_e: - case ls_number_e_sign: - if (number_saw_point && (number_after_point == 0)) { - // QTC::TC("libtests", "JSON parse decimal with no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": decimal point with no digits"); - } - if ((number_before_point > 1) && - ((first_char == '0') || - ((first_char == '-') && (token.at(1) == '0')))) { - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with leading zero"); - } - if ((number_before_point == 0) && (number_after_point == 0)) { - // QTC::TC("libtests", "JSON parse number no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with no digits"); - } item = std::make_shared(JSON::makeNumber(token)); break; @@ -1229,10 +1192,9 @@ JSONParser::handleToken() item = std::make_shared(JSON::makeString(s_value)); break; - case ls_backslash: - case ls_u4: + default: throw std::logic_error( - "tok_end is set while state = ls_backslash or ls_u4"); + "JSONParser::handleToken : non-terminal lexer state encountered"); break; } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 5ca5fb77..4b3bb45b 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -79,9 +79,8 @@ JSON parse number minus no digits 0 JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 -JSON parse unterminated string 0 JSON parse leading zero 0 -JSON parse premature end of u 0 +JSON parse ls premature end of input 0 JSON parse bad hex after u 0 JSONHandler unhandled value 0 JSONHandler unexpected key 0 diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out index 2c2df076..70fcbf74 100644 --- a/libtests/qtest/json_parse/bad-27.out +++ b/libtests/qtest/json_parse/bad-27.out @@ -1 +1 @@ -exception: bad-27.json: JSON: offset 6: unterminated string +exception: bad-27.json: JSON: premature end of input diff --git a/libtests/qtest/json_parse/bad-28.out b/libtests/qtest/json_parse/bad-28.out index d7db2aea..005a68d2 100644 --- a/libtests/qtest/json_parse/bad-28.out +++ b/libtests/qtest/json_parse/bad-28.out @@ -1 +1 @@ -exception: bad-28.json: JSON: offset 16: unterminated string +exception: bad-28.json: JSON: premature end of input diff --git a/libtests/qtest/json_parse/bad-34.out b/libtests/qtest/json_parse/bad-34.out index f9db587a..c21838c4 100644 --- a/libtests/qtest/json_parse/bad-34.out +++ b/libtests/qtest/json_parse/bad-34.out @@ -1 +1 @@ -exception: bad-34.json: JSON: offset 3: \u must be followed by four characters +exception: bad-34.json: JSON: premature end of input -- cgit v1.2.3-54-g00ecf From 0de032bcdd49d50df6a3e4a2e6325e5144c4619e Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 11:12:30 +0000 Subject: In JSONParser::handleToken simplify setting of start and end --- libqpdf/JSON.cc | 8 ++------ libtests/qtest/json_parse/good-01-react.out | 4 ++-- libtests/qtest/json_parse/good-04-react.out | 10 +++++----- libtests/qtest/json_parse/good-10-react.out | 10 +++++----- libtests/qtest/json_parse/good-11-react.out | 4 ++-- 5 files changed, 16 insertions(+), 20 deletions(-) (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 9775bc39..1749005b 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1142,12 +1142,10 @@ JSONParser::handleToken() switch (lex_state) { case ls_begin_dict: item = std::make_shared(JSON::makeDictionary()); - item->setStart(token_start); break; case ls_begin_array: item = std::make_shared(JSON::makeArray()); - item->setStart(token_start); break; case ls_colon: @@ -1306,10 +1304,8 @@ JSONParser::handleToken() parser_state_e next_state = ps_top; - if (!(item->isArray() || item->isDictionary())) { - item->setStart(token_start); - item->setEnd(offset); - } + item->setStart(token_start); + item->setEnd(offset); switch (parser_state) { case ps_dict_begin: diff --git a/libtests/qtest/json_parse/good-01-react.out b/libtests/qtest/json_parse/good-01-react.out index e3813bcc..3951272d 100644 --- a/libtests/qtest/json_parse/good-01-react.out +++ b/libtests/qtest/json_parse/good-01-react.out @@ -1,13 +1,13 @@ dictionary start dictionary item: a -> [6, 11): "bcd" -dictionary item: e -> [18, 0): [] +dictionary item: e -> [18, 19): [] array start array item: [19, 20): 1 array item: [41, 42): 2 array item: [44, 45): 3 array item: [46, 47): 4 array item: [48, 54): "five" -array item: [56, 0): {} +array item: [56, 57): {} dictionary start dictionary item: six -> [64, 65): 7 dictionary item: 8 -> [72, 73): 9 diff --git a/libtests/qtest/json_parse/good-04-react.out b/libtests/qtest/json_parse/good-04-react.out index ded004b2..8d931535 100644 --- a/libtests/qtest/json_parse/good-04-react.out +++ b/libtests/qtest/json_parse/good-04-react.out @@ -1,15 +1,15 @@ array start -array item: [1, 0): [] +array item: [1, 2): [] array start -array item: [2, 0): [] +array item: [2, 3): [] array start -array item: [3, 0): {} +array item: [3, 4): {} dictionary start container end: [3, 5): {} container end: [2, 6): [] -array item: [8, 0): {} +array item: [8, 9): {} dictionary start -dictionary item: -> [13, 0): {} +dictionary item: -> [13, 14): {} dictionary start container end: [13, 15): {} container end: [8, 16): {} diff --git a/libtests/qtest/json_parse/good-10-react.out b/libtests/qtest/json_parse/good-10-react.out index 3cceeb2f..8c31f915 100644 --- a/libtests/qtest/json_parse/good-10-react.out +++ b/libtests/qtest/json_parse/good-10-react.out @@ -1,21 +1,21 @@ dictionary start -dictionary item: a -> [9, 0): [] +dictionary item: a -> [9, 10): [] array start array item: [10, 11): 1 array item: [13, 14): 2 -array item: [16, 0): {} +array item: [16, 17): {} dictionary start dictionary item: x -> [22, 25): "y" container end: [16, 26): {} array item: [28, 29): 3 -array item: [31, 0): {} +array item: [31, 32): {} dictionary start dictionary item: keep -> [40, 61): "not in final output" container end: [31, 62): { "keep": "not in final output" } container end: [9, 63): [] -dictionary item: keep -> [75, 0): [] +dictionary item: keep -> [75, 76): [] array start array item: [76, 77): 1 array item: [79, 83): null @@ -23,7 +23,7 @@ array item: [85, 86): 2 array item: [88, 93): false array item: [95, 101): "keep" array item: [103, 104): 3 -array item: [106, 0): [] +array item: [106, 107): [] array start array item: [107, 113): "this" array item: [115, 121): "keep" diff --git a/libtests/qtest/json_parse/good-11-react.out b/libtests/qtest/json_parse/good-11-react.out index 6cf3345e..6d7d4275 100644 --- a/libtests/qtest/json_parse/good-11-react.out +++ b/libtests/qtest/json_parse/good-11-react.out @@ -1,12 +1,12 @@ array start -array item: [4, 0): [] +array item: [4, 5): [] array start array item: [5, 11): "u:π" array item: [13, 23): "u:π" array item: [25, 39): "b:EFBBBFCF80" array item: [41, 53): "b:feff03c0" container end: [4, 54): [] -array item: [58, 0): [] +array item: [58, 59): [] array start array item: [59, 67): "u:🥔" array item: [69, 85): "u:🥔" -- cgit v1.2.3-54-g00ecf From 5ac6a12e0a76613d29edc65beb6b99af45172493 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 11:39:15 +0000 Subject: In JSONParser::getToken reject illegal control characters --- libqpdf/JSON.cc | 20 ++++++++++++++++---- libtests/qtest/json_parse.test | 4 ++++ libtests/qtest/json_parse/bad-18.out | 2 +- libtests/qtest/json_parse/bad-45.json | 1 + libtests/qtest/json_parse/bad-45.out | 1 + libtests/qtest/json_parse/bad-46.json | 1 + libtests/qtest/json_parse/bad-46.out | 1 + libtests/qtest/json_parse/bad-47.json | 2 ++ libtests/qtest/json_parse/bad-47.out | 1 + libtests/qtest/json_parse/bad-48.json | 1 + libtests/qtest/json_parse/bad-48.out | 1 + 11 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 libtests/qtest/json_parse/bad-45.json create mode 100644 libtests/qtest/json_parse/bad-45.out create mode 100644 libtests/qtest/json_parse/bad-46.json create mode 100644 libtests/qtest/json_parse/bad-46.out create mode 100644 libtests/qtest/json_parse/bad-47.json create mode 100644 libtests/qtest/json_parse/bad-47.out create mode 100644 libtests/qtest/json_parse/bad-48.json create mode 100644 libtests/qtest/json_parse/bad-48.out (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index b972d8aa..da0de9eb 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -780,10 +780,22 @@ JSONParser::getToken() } } - if (*p == 0) { - QTC::TC("libtests", "JSON parse null character"); - throw std::runtime_error( - "JSON: null character at offset " + std::to_string(offset)); + if ((*p < 32 && *p >= 0)) { + if (*p == '\t' || *p == '\n' || *p == '\r') { + // Legal white space not permitted in strings. This will always + // end the current token (unless we are still before the start + // of the token). + if (lex_state == ls_top) { + // Continue with token + } else { + // done + } + } else { + QTC::TC("libtests", "JSON parse null character"); + throw std::runtime_error( + "JSON: control or null character at offset " + + std::to_string(offset)); + } } action = append; switch (lex_state) { diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index d38d70de..8234b755 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -125,6 +125,10 @@ my @bad = ( "e after minus", # 42 "missing digit after e", # 43 "missing digit after e+/-", # 44 + # "tab char in string", # 45 + # "cr char in string", # 46 + # "lf char in string", # 47 + # "bs char in string", # 48 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-18.out b/libtests/qtest/json_parse/bad-18.out index 0428b64f..1e779e41 100644 --- a/libtests/qtest/json_parse/bad-18.out +++ b/libtests/qtest/json_parse/bad-18.out @@ -1 +1 @@ -exception: bad-18.json: JSON: null character at offset 5 +exception: bad-18.json: JSON: control or null character at offset 5 diff --git a/libtests/qtest/json_parse/bad-45.json b/libtests/qtest/json_parse/bad-45.json new file mode 100644 index 00000000..16107dc0 --- /dev/null +++ b/libtests/qtest/json_parse/bad-45.json @@ -0,0 +1 @@ +"Tab in str ing" diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out new file mode 100644 index 00000000..ba7e4f16 --- /dev/null +++ b/libtests/qtest/json_parse/bad-45.out @@ -0,0 +1 @@ +"Tab in str\ting" diff --git a/libtests/qtest/json_parse/bad-46.json b/libtests/qtest/json_parse/bad-46.json new file mode 100644 index 00000000..60873bf4 --- /dev/null +++ b/libtests/qtest/json_parse/bad-46.json @@ -0,0 +1 @@ +"cr in str ing" diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out new file mode 100644 index 00000000..2baad6a4 --- /dev/null +++ b/libtests/qtest/json_parse/bad-46.out @@ -0,0 +1 @@ +"cr in str\ring" diff --git a/libtests/qtest/json_parse/bad-47.json b/libtests/qtest/json_parse/bad-47.json new file mode 100644 index 00000000..3c75427a --- /dev/null +++ b/libtests/qtest/json_parse/bad-47.json @@ -0,0 +1,2 @@ +"lf in str +ing" diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out new file mode 100644 index 00000000..30549072 --- /dev/null +++ b/libtests/qtest/json_parse/bad-47.out @@ -0,0 +1 @@ +"lf in str\ning" diff --git a/libtests/qtest/json_parse/bad-48.json b/libtests/qtest/json_parse/bad-48.json new file mode 100644 index 00000000..1e605808 --- /dev/null +++ b/libtests/qtest/json_parse/bad-48.json @@ -0,0 +1 @@ +"bs in string" \ No newline at end of file diff --git a/libtests/qtest/json_parse/bad-48.out b/libtests/qtest/json_parse/bad-48.out new file mode 100644 index 00000000..0b20fc7a --- /dev/null +++ b/libtests/qtest/json_parse/bad-48.out @@ -0,0 +1 @@ +exception: bad-48.json: JSON: control or null character at offset 10 -- cgit v1.2.3-54-g00ecf From ee32235f54884247f6117fc0fbdd462a4e38ac1f Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 15:21:29 +0000 Subject: In JSONParser::getToken handle legal control chars early Also, reject them in strings. --- libqpdf/JSON.cc | 564 ++++++++++++++++++---------------- libtests/libtests.testcov | 1 + libtests/qtest/json_parse.test | 8 +- libtests/qtest/json_parse/bad-01.out | 2 +- libtests/qtest/json_parse/bad-02.out | 2 +- libtests/qtest/json_parse/bad-03.out | 2 +- libtests/qtest/json_parse/bad-27.out | 2 +- libtests/qtest/json_parse/bad-31.json | 2 +- libtests/qtest/json_parse/bad-45.out | 2 +- libtests/qtest/json_parse/bad-46.out | 2 +- libtests/qtest/json_parse/bad-47.out | 2 +- 11 files changed, 305 insertions(+), 284 deletions(-) (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index afeda315..e9637e86 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -723,10 +723,11 @@ JSONParser::handle_u_code( void JSONParser::tokenError() { - if (bytes == 0) { + if (done) { QTC::TC("libtests", "JSON parse ls premature end of input"); throw std::runtime_error("JSON: premature end of input"); } + if (lex_state == ls_u4) { QTC::TC("libtests", "JSON parse bad hex after u"); throw std::runtime_error( @@ -737,6 +738,11 @@ JSONParser::tokenError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " + std::string(p, 1)); + } else if (lex_state == ls_string) { + QTC::TC("libtests", "JSON parse control char in string"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": control character in string (missing \"?)"); } else if (lex_state == ls_backslash) { QTC::TC("libtests", "JSON parse backslash bad character"); throw std::runtime_error( @@ -779,6 +785,7 @@ JSONParser::tokenError() "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " + std::string(p, 1)); } + throw std::logic_error("JSON::tokenError : unhandled error"); } void @@ -792,7 +799,7 @@ JSONParser::getToken() unsigned long high_surrogate = 0; qpdf_offset_t high_offset = 0; - while (!done) { + while (true) { if (p == (buf + bytes)) { p = buf; bytes = is.read(buf, sizeof(buf)); @@ -808,307 +815,320 @@ JSONParser::getToken() // end the current token (unless we are still before the start // of the token). if (lex_state == ls_top) { - // Continue with token + ++p; + ++offset; } else { - // done + break; } + } else { QTC::TC("libtests", "JSON parse null character"); throw std::runtime_error( "JSON: control or null character at offset " + std::to_string(offset)); } - } - action = append; - switch (lex_state) { - case ls_top: - token_start = offset; - if (*p == '"') { - lex_state = ls_string; - action = ignore; - } else if (QUtil::is_space(*p)) { - action = ignore; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ':') { - lex_state = ls_colon; - action = ignore; - ready = true; - } else if (*p == '{') { - lex_state = ls_begin_dict; - action = ignore; - ready = true; - } else if (*p == '}') { - lex_state = ls_end_dict; - action = ignore; - ready = true; - } else if (*p == '[') { - lex_state = ls_begin_array; - action = ignore; - ready = true; - } else if (*p == ']') { - lex_state = ls_end_array; - action = ignore; - ready = true; - } else if ((*p >= 'a') && (*p <= 'z')) { - lex_state = ls_alpha; - } else if (*p == '-') { - lex_state = ls_number_minus; - } else if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; - } else if (*p == '0') { - lex_state = ls_number_leading_zero; - } else { - QTC::TC("libtests", "JSON parse bad character"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected character " + std::string(p, 1)); - } - break; - - case ls_number_minus: - if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; - } else if (*p == '0') { - lex_state = ls_number_leading_zero; - } else { - QTC::TC("libtests", "JSON parse number minus no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: no digit after minus sign"); - } - break; - - case ls_number_leading_zero: - if (*p == '.') { - lex_state = ls_number_point; - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - QTC::TC("libtests", "JSON parse leading zero"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with leading zero"); - } - break; - - case ls_number_before_point: - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (*p == '.') { - lex_state = ls_number_point; - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - tokenError(); - } - break; - - case ls_number_point: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number_after_point; - } else { - tokenError(); - } - break; - - case ls_number_after_point: - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - tokenError(); - } - break; + } else { + action = append; + switch (lex_state) { + case ls_top: + token_start = offset; + if (*p == '"') { + lex_state = ls_string; + action = ignore; + } else if (*p == ' ') { + action = ignore; + } else if (*p == ',') { + lex_state = ls_comma; + action = ignore; + ready = true; + } else if (*p == ',') { + lex_state = ls_comma; + action = ignore; + ready = true; + } else if (*p == ':') { + lex_state = ls_colon; + action = ignore; + ready = true; + } else if (*p == '{') { + lex_state = ls_begin_dict; + action = ignore; + ready = true; + } else if (*p == '}') { + lex_state = ls_end_dict; + action = ignore; + ready = true; + } else if (*p == '[') { + lex_state = ls_begin_array; + action = ignore; + ready = true; + } else if (*p == ']') { + lex_state = ls_end_array; + action = ignore; + ready = true; + } else if ((*p >= 'a') && (*p <= 'z')) { + lex_state = ls_alpha; + } else if (*p == '-') { + lex_state = ls_number_minus; + } else if ((*p >= '1') && (*p <= '9')) { + lex_state = ls_number_before_point; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + } else { + QTC::TC("libtests", "JSON parse bad character"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected character " + std::string(p, 1)); + } + break; - case ls_number_e: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; - } else if ((*p == '+') || (*p == '-')) { - lex_state = ls_number_e_sign; - } else { - tokenError(); - } - break; + case ls_number_minus: + if ((*p >= '1') && (*p <= '9')) { + lex_state = ls_number_before_point; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + } else { + QTC::TC("libtests", "JSON parse number minus no digits"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: no digit after minus sign"); + } + break; - case ls_number_e_sign: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; - } else { - tokenError(); - } - break; + case ls_number_leading_zero: + if (*p == '.') { + lex_state = ls_number_point; + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + QTC::TC("libtests", "JSON parse leading zero"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": number with leading zero"); + } + break; - case ls_number: - // We only get here after we have seen an exponent. - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (QUtil::is_space(*p)) { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; - } else { - tokenError(); - } - break; + case ls_number_before_point: + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == '.') { + lex_state = ls_number_point; + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + tokenError(); + } + break; - case ls_alpha: - if ((*p >= 'a') && (*p <= 'z')) { - // okay - } else if (QUtil::is_space(*p)) { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; - } else { - tokenError(); - } - break; + case ls_number_point: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number_after_point; + } else { + tokenError(); + } + break; - case ls_string: - if (*p == '"') { - if (high_offset) { - QTC::TC("libtests", "JSON 16 dangling high"); - throw std::runtime_error( - "JSON: offset " + std::to_string(high_offset) + - ": UTF-16 high surrogate not followed by low " - "surrogate"); + case ls_number_after_point: + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + tokenError(); } - action = ignore; - ready = true; - } else if (*p == '\\') { - lex_state = ls_backslash; - action = ignore; - } - break; + break; - case ls_backslash: - action = ignore; - lex_state = ls_string; - switch (*p) { - case '\\': - case '\"': - case '/': - // \/ is allowed in json input, but so is /, so we - // don't map / to \/ in output. - token += *p; + case ls_number_e: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number; + } else if ((*p == '+') || (*p == '-')) { + lex_state = ls_number_e_sign; + } else { + tokenError(); + } break; - case 'b': - token += '\b'; + + case ls_number_e_sign: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number; + } else { + tokenError(); + } break; - case 'f': - token += '\f'; + + case ls_number: + // We only get here after we have seen an exponent. + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == ' ') { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + tokenError(); + } break; - case 'n': - token += '\n'; + + case ls_alpha: + if ((*p >= 'a') && (*p <= 'z')) { + // okay + } else if (*p == ' ') { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + tokenError(); + } break; - case 'r': - token += '\r'; + + case ls_string: + if (*p == '"') { + if (high_offset) { + QTC::TC("libtests", "JSON 16 dangling high"); + throw std::runtime_error( + "JSON: offset " + std::to_string(high_offset) + + ": UTF-16 high surrogate not followed by low " + "surrogate"); + } + action = ignore; + ready = true; + } else if (*p == '\\') { + lex_state = ls_backslash; + action = ignore; + } break; - case 't': - token += '\t'; + + case ls_backslash: + action = ignore; + lex_state = ls_string; + switch (*p) { + case '\\': + case '\"': + case '/': + // \/ is allowed in json input, but so is /, so we + // don't map / to \/ in output. + token += *p; + break; + case 'b': + token += '\b'; + break; + case 'f': + token += '\f'; + break; + case 'n': + token += '\n'; + break; + case 'r': + token += '\r'; + break; + case 't': + token += '\t'; + break; + case 'u': + lex_state = ls_u4; + u_count = 0; + u_value = 0; + break; + default: + lex_state = ls_backslash; + tokenError(); + } break; - case 'u': - lex_state = ls_u4; - u_count = 0; - u_value = 0; + + case ls_u4: + using ui = unsigned int; + action = ignore; + if ('0' <= *p && *p <= '9') { + u_value = 16 * u_value + (ui(*p) - ui('0')); + } else if ('a' <= *p && *p <= 'f') { + u_value = 16 * u_value + (10 + ui(*p) - ui('a')); + } else if ('A' <= *p && *p <= 'F') { + u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + } else { + tokenError(); + } + if (++u_count == 4) { + handle_u_code( + u_value, + offset - 5, + high_surrogate, + high_offset, + token); + lex_state = ls_string; + } break; + default: - lex_state = ls_backslash; - tokenError(); + throw std::logic_error( + "JSONParser::getToken : trying to handle delimiter state"); } - break; - - case ls_u4: - using ui = unsigned int; - action = ignore; - if ('0' <= *p && *p <= '9') { - u_value = 16 * u_value + (ui(*p) - ui('0')); - } else if ('a' <= *p && *p <= 'f') { - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); - } else if ('A' <= *p && *p <= 'F') { - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); - } else { - tokenError(); + switch (action) { + case reread: + break; + case append: + token.append(1, *p); + // fall through + case ignore: + ++p; + ++offset; + break; } - if (++u_count == 4) { - handle_u_code( - u_value, offset - 5, high_surrogate, high_offset, token); - lex_state = ls_string; + if (ready) { + return; } - break; - - default: - throw std::logic_error( - "JSONParser::getToken : trying to handle delimiter state"); - } - switch (action) { - case reread: - break; - case append: - token.append(1, *p); - // fall through - case ignore: - ++p; - ++offset; - break; - } - if (ready) { - break; } } - if (done) { - if (!token.empty() && !ready) { - switch (lex_state) { - case ls_top: - // Can't happen - throw std::logic_error("tok_start set in ls_top while parsing"); - break; - case ls_number_leading_zero: - case ls_number_before_point: - case ls_number_after_point: - lex_state = ls_number; - break; + // We only get here if on end of input or if the last character was a + // control character. - case ls_number: - case ls_alpha: - // terminal state - break; + if (!token.empty()) { + switch (lex_state) { + case ls_top: + // Can't happen + throw std::logic_error("tok_start set in ls_top while parsing"); + break; - default: - tokenError(); - } + case ls_number_leading_zero: + case ls_number_before_point: + case ls_number_after_point: + lex_state = ls_number; + break; + + case ls_number: + case ls_alpha: + // terminal state + break; + + default: + tokenError(); } } } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 4b3bb45b..5e5c2e00 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -79,6 +79,7 @@ JSON parse number minus no digits 0 JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 +JSON parse control char in string 0 JSON parse leading zero 0 JSON parse ls premature end of input 0 JSON parse bad hex after u 0 diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 8234b755..699544f6 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -125,10 +125,10 @@ my @bad = ( "e after minus", # 42 "missing digit after e", # 43 "missing digit after e+/-", # 44 - # "tab char in string", # 45 - # "cr char in string", # 46 - # "lf char in string", # 47 - # "bs char in string", # 48 + "tab char in string", # 45 + "cr char in string", # 46 + "lf char in string", # 47 + "bs char in string", # 48 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-01.out b/libtests/qtest/json_parse/bad-01.out index a4254cff..8ae96c30 100644 --- a/libtests/qtest/json_parse/bad-01.out +++ b/libtests/qtest/json_parse/bad-01.out @@ -1 +1 @@ -exception: bad-01.json: JSON: offset 9: material follows end of object: junk +exception: bad-01.json: JSON: offset 8: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-02.out b/libtests/qtest/json_parse/bad-02.out index 485c9658..212b2f4f 100644 --- a/libtests/qtest/json_parse/bad-02.out +++ b/libtests/qtest/json_parse/bad-02.out @@ -1 +1 @@ -exception: bad-02.json: JSON: offset 11: material follows end of object: junk +exception: bad-02.json: JSON: offset 10: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-03.out b/libtests/qtest/json_parse/bad-03.out index 38f35119..a1411e0e 100644 --- a/libtests/qtest/json_parse/bad-03.out +++ b/libtests/qtest/json_parse/bad-03.out @@ -1 +1 @@ -exception: bad-03.json: JSON: offset 16: material follows end of object: junk +exception: bad-03.json: JSON: offset 15: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out index 70fcbf74..4c1ecfeb 100644 --- a/libtests/qtest/json_parse/bad-27.out +++ b/libtests/qtest/json_parse/bad-27.out @@ -1 +1 @@ -exception: bad-27.json: JSON: premature end of input +exception: bad-27.json: JSON: offset 5: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-31.json b/libtests/qtest/json_parse/bad-31.json index 39cdd0de..277cc02f 100644 --- a/libtests/qtest/json_parse/bad-31.json +++ b/libtests/qtest/json_parse/bad-31.json @@ -1 +1 @@ -- +- diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out index ba7e4f16..d4320b0a 100644 --- a/libtests/qtest/json_parse/bad-45.out +++ b/libtests/qtest/json_parse/bad-45.out @@ -1 +1 @@ -"Tab in str\ting" +exception: bad-45.json: JSON: offset 11: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out index 2baad6a4..50aa5ffb 100644 --- a/libtests/qtest/json_parse/bad-46.out +++ b/libtests/qtest/json_parse/bad-46.out @@ -1 +1 @@ -"cr in str\ring" +exception: bad-46.json: JSON: offset 10: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out index 30549072..39f9d3d5 100644 --- a/libtests/qtest/json_parse/bad-47.out +++ b/libtests/qtest/json_parse/bad-47.out @@ -1 +1 @@ -"lf in str\ning" +exception: bad-47.json: JSON: offset 10: control character in string (missing "?) -- cgit v1.2.3-54-g00ecf From d3152869b666a725d303e0667a69f973fc5a96ed Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 30 Jan 2023 13:17:09 +0000 Subject: In JSONParser::getToken handle structural and space chars early --- libqpdf/JSON.cc | 164 ++++++++++++++++++++--------------- libtests/qtest/json_parse/bad-09.out | 2 +- libtests/qtest/json_parse/bad-31.out | 2 +- 3 files changed, 95 insertions(+), 73 deletions(-) (limited to 'libtests/qtest/json_parse') diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index e9637e86..59843c05 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -791,7 +791,7 @@ JSONParser::tokenError() void JSONParser::getToken() { - enum { append, ignore, reread } action = append; + enum { append, ignore } action = append; bool ready = false; token.clear(); @@ -820,13 +820,103 @@ JSONParser::getToken() } else { break; } - } else { QTC::TC("libtests", "JSON parse null character"); throw std::runtime_error( "JSON: control or null character at offset " + std::to_string(offset)); } + } else if (*p == ',') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_comma; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == ':') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_colon; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == ' ') { + if (lex_state == ls_top) { + ++p; + ++offset; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == '{') { + if (lex_state == ls_top) { + token_start = offset; + ++p; + ++offset; + lex_state = ls_begin_dict; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == '}') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_end_dict; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == '[') { + if (lex_state == ls_top) { + token_start = offset; + ++p; + ++offset; + lex_state = ls_begin_array; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == ']') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_end_array; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } } else { action = append; switch (lex_state) { @@ -835,36 +925,6 @@ JSONParser::getToken() if (*p == '"') { lex_state = ls_string; action = ignore; - } else if (*p == ' ') { - action = ignore; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ':') { - lex_state = ls_colon; - action = ignore; - ready = true; - } else if (*p == '{') { - lex_state = ls_begin_dict; - action = ignore; - ready = true; - } else if (*p == '}') { - lex_state = ls_end_dict; - action = ignore; - ready = true; - } else if (*p == '[') { - lex_state = ls_begin_array; - action = ignore; - ready = true; - } else if (*p == ']') { - lex_state = ls_end_array; - action = ignore; - ready = true; } else if ((*p >= 'a') && (*p <= 'z')) { lex_state = ls_alpha; } else if (*p == '-') { @@ -897,14 +957,6 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { lex_state = ls_number_point; - } else if (*p == ' ') { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { @@ -920,14 +972,6 @@ JSONParser::getToken() // continue } else if (*p == '.') { lex_state = ls_number_point; - } else if (*p == ' ') { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { @@ -946,14 +990,6 @@ JSONParser::getToken() case ls_number_after_point: if ((*p >= '0') && (*p <= '9')) { // continue - } else if (*p == ' ') { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { @@ -983,12 +1019,6 @@ JSONParser::getToken() // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { // continue - } else if (*p == ' ') { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; } else { tokenError(); } @@ -997,12 +1027,6 @@ JSONParser::getToken() case ls_alpha: if ((*p >= 'a') && (*p <= 'z')) { // okay - } else if (*p == ' ') { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; } else { tokenError(); } @@ -1090,8 +1114,6 @@ JSONParser::getToken() "JSONParser::getToken : trying to handle delimiter state"); } switch (action) { - case reread: - break; case append: token.append(1, *p); // fall through @@ -1107,7 +1129,7 @@ JSONParser::getToken() } // We only get here if on end of input or if the last character was a - // control character. + // control character or other delimiter. if (!token.empty()) { switch (lex_state) { diff --git a/libtests/qtest/json_parse/bad-09.out b/libtests/qtest/json_parse/bad-09.out index 21d2f1c1..979d53d0 100644 --- a/libtests/qtest/json_parse/bad-09.out +++ b/libtests/qtest/json_parse/bad-09.out @@ -1 +1 @@ -exception: bad-09.json: JSON: offset 3: expect string as dictionary key +exception: bad-09.json: JSON: offset 2: expect string as dictionary key diff --git a/libtests/qtest/json_parse/bad-31.out b/libtests/qtest/json_parse/bad-31.out index 2228d08d..af177726 100644 --- a/libtests/qtest/json_parse/bad-31.out +++ b/libtests/qtest/json_parse/bad-31.out @@ -1 +1 @@ -exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign +exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number -- cgit v1.2.3-54-g00ecf