From b6b4d3b299490966524ee5a1a8ecc03c267af0c8 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 00:44:16 +0000 Subject: Add new method JSONParser::numberError --- libqpdf/JSON.cc | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 77418ddb..5205d4f9 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -620,6 +620,7 @@ namespace private: void getToken(); void handleToken(); + void numberError(); static std::string decode_string(std::string const& json, qpdf_offset_t offset); static void handle_u_code( @@ -796,6 +797,39 @@ JSONParser::decode_string(std::string const& str, qpdf_offset_t offset) return result; } +void +JSONParser::numberError() +{ + if (*p == '.') { + if (number_saw_e) { + // QTC::TC("libtests", "JSON parse point after e"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: decimal point after e"); + } else { + // QTC::TC("libtests", "JSON parse duplicate point"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: decimal point already seen"); + } + } else if (*p == 'e') { + // QTC::TC("libtests", "JSON parse duplicate e"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: e already seen"); + } else if ((*p == '+') || (*p == '-')) { + // QTC::TC("libtests", "JSON parse unexpected sign"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: unexpected sign"); + } else { + QTC::TC("libtests", "JSON parse numeric bad character"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: unexpected character " + std::string(p, 1)); + } +} + void JSONParser::getToken() { @@ -905,11 +939,7 @@ JSONParser::getToken() action = reread; ready = true; } else { - QTC::TC("libtests", "JSON parse numeric bad character"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: unexpected character " + - std::string(p, 1)); + numberError(); } break; -- cgit v1.2.3-54-g00ecf From cba1c352e3c4236205dc96de643e780abb3c7b64 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 11:47:06 +0000 Subject: In JSONParser add lex_state ls_number_minus --- libqpdf/JSON.cc | 24 +++++++++++++++++++++--- libtests/libtests.testcov | 3 +-- libtests/qtest/json_parse.test | 2 ++ libtests/qtest/json_parse/bad-31.out | 2 +- libtests/qtest/json_parse/bad-41.json | 2 ++ libtests/qtest/json_parse/bad-41.out | 1 + libtests/qtest/json_parse/bad-42.json | 1 + libtests/qtest/json_parse/bad-42.out | 1 + 8 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 libtests/qtest/json_parse/bad-41.json create mode 100644 libtests/qtest/json_parse/bad-41.out create mode 100644 libtests/qtest/json_parse/bad-42.json create mode 100644 libtests/qtest/json_parse/bad-42.out diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 5205d4f9..c881811b 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -647,6 +647,7 @@ namespace enum lex_state_e { ls_top, ls_number, + ls_number_minus, ls_alpha, ls_string, ls_backslash, @@ -861,7 +862,7 @@ JSONParser::getToken() } else if ((*p >= 'a') && (*p <= 'z')) { lex_state = ls_alpha; } else if (*p == '-') { - lex_state = ls_number; + lex_state = ls_number_minus; number_before_point = 0; number_after_point = 0; number_after_e = 0; @@ -891,6 +892,21 @@ JSONParser::getToken() } break; + case ls_number_minus: + if ((*p >= '1') && (*p <= '9')) { + ++number_before_point; + lex_state = ls_number; + } else if (*p == '0') { + ++number_before_point; + lex_state = ls_number; + } else { + QTC::TC("libtests", "JSON parse number minus no digits"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: no digit after minus sign"); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -1020,6 +1036,7 @@ JSONParser::getToken() break; case ls_number: + case ls_number_minus: case ls_alpha: // okay break; @@ -1093,8 +1110,9 @@ JSONParser::handleToken() break; case ls_number: + case ls_number_minus: if (number_saw_point && (number_after_point == 0)) { - QTC::TC("libtests", "JSON parse decimal with no digits"); + // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": decimal point with no digits"); @@ -1108,7 +1126,7 @@ JSONParser::handleToken() ": number with leading zero"); } if ((number_before_point == 0) && (number_after_point == 0)) { - QTC::TC("libtests", "JSON parse number no digits"); + // QTC::TC("libtests", "JSON parse number no digits"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": number with no digits"); diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 2ceef541..26cf2048 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -58,7 +58,6 @@ QPDFArgParser bad option for help 0 QPDFArgParser bad topic for help 0 QPDFArgParser invalid choice handler to unknown 0 JSON parse junk after object 0 -JSON parse decimal with no digits 0 JSON parse invalid keyword 0 JSON parse expected colon 0 JSON parse expected , or } 0 @@ -76,11 +75,11 @@ JSON parse duplicate point 0 JSON parse duplicate e 0 JSON parse unexpected sign 0 JSON parse numeric bad character 0 +JSON parse number minus no digits 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 JSON parse unterminated string 0 JSON parse leading zero 0 -JSON parse number no digits 0 JSON parse premature end of u 0 JSON parse bad hex after u 0 JSONHandler unhandled value 0 diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 112da0a9..7c64e3bd 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -121,6 +121,8 @@ my @bad = ( "high high surrogate", # 38 "dangling high surrogate", # 39 "duplicate dictionary key", # 40 + "decimal point after minus",# 41 + "e after minus", # 42 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-31.out b/libtests/qtest/json_parse/bad-31.out index 344f42e8..2228d08d 100644 --- a/libtests/qtest/json_parse/bad-31.out +++ b/libtests/qtest/json_parse/bad-31.out @@ -1 +1 @@ -exception: bad-31.json: JSON: offset 2: number with no digits +exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign diff --git a/libtests/qtest/json_parse/bad-41.json b/libtests/qtest/json_parse/bad-41.json new file mode 100644 index 00000000..dad59049 --- /dev/null +++ b/libtests/qtest/json_parse/bad-41.json @@ -0,0 +1,2 @@ +-.123 + diff --git a/libtests/qtest/json_parse/bad-41.out b/libtests/qtest/json_parse/bad-41.out new file mode 100644 index 00000000..bebcfdb9 --- /dev/null +++ b/libtests/qtest/json_parse/bad-41.out @@ -0,0 +1 @@ +exception: bad-41.json: JSON: offset 1: numeric literal: no digit after minus sign diff --git a/libtests/qtest/json_parse/bad-42.json b/libtests/qtest/json_parse/bad-42.json new file mode 100644 index 00000000..2f9148b0 --- /dev/null +++ b/libtests/qtest/json_parse/bad-42.json @@ -0,0 +1 @@ +-e123 diff --git a/libtests/qtest/json_parse/bad-42.out b/libtests/qtest/json_parse/bad-42.out new file mode 100644 index 00000000..96e9a0a3 --- /dev/null +++ b/libtests/qtest/json_parse/bad-42.out @@ -0,0 +1 @@ +exception: bad-42.json: JSON: offset 1: numeric literal: no digit after minus sign -- cgit v1.2.3-54-g00ecf From 08e768909d9760a3588b8a8eaaeda0b357a85c62 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 11:22:06 +0000 Subject: In JSONParser add lex_state ls_number_leading_zero --- libqpdf/JSON.cc | 34 +++++++++++++++++++++++++++++++--- libtests/qtest/json_parse/bad-32.out | 2 +- libtests/qtest/json_parse/bad-33.out | 2 +- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c881811b..1c3378a7 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -648,6 +648,7 @@ namespace ls_top, ls_number, ls_number_minus, + ls_number_leading_zero, ls_alpha, ls_string, ls_backslash, @@ -868,13 +869,20 @@ JSONParser::getToken() number_after_e = 0; number_saw_point = false; number_saw_e = false; - } else if ((*p >= '0') && (*p <= '9')) { + } else if ((*p >= '1') && (*p <= '9')) { lex_state = ls_number; number_before_point = 1; number_after_point = 0; number_after_e = 0; number_saw_point = false; number_saw_e = false; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + number_before_point = 1; + number_after_point = 0; + number_after_e = 0; + number_saw_point = false; + number_saw_e = false; } else if (*p == '.') { lex_state = ls_number; number_before_point = 0; @@ -898,7 +906,7 @@ JSONParser::getToken() lex_state = ls_number; } else if (*p == '0') { ++number_before_point; - lex_state = ls_number; + lex_state = ls_number_leading_zero; } else { QTC::TC("libtests", "JSON parse number minus no digits"); throw std::runtime_error( @@ -907,6 +915,25 @@ JSONParser::getToken() } break; + case ls_number_leading_zero: + if (*p == '.') { + lex_state = ls_number; + } else if (*p == 'e') { + lex_state = ls_number; + } else if (QUtil::is_space(*p)) { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + QTC::TC("libtests", "JSON parse leading zero"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": number with leading zero"); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -1037,6 +1064,7 @@ JSONParser::getToken() case ls_number: case ls_number_minus: + case ls_number_leading_zero: case ls_alpha: // okay break; @@ -1111,6 +1139,7 @@ JSONParser::handleToken() case ls_number: case ls_number_minus: + case ls_number_leading_zero: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( @@ -1120,7 +1149,6 @@ JSONParser::handleToken() if ((number_before_point > 1) && ((first_char == '0') || ((first_char == '-') && (token.at(1) == '0')))) { - QTC::TC("libtests", "JSON parse leading zero"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": number with leading zero"); diff --git a/libtests/qtest/json_parse/bad-32.out b/libtests/qtest/json_parse/bad-32.out index 4372e0cf..41a681c0 100644 --- a/libtests/qtest/json_parse/bad-32.out +++ b/libtests/qtest/json_parse/bad-32.out @@ -1 +1 @@ -exception: bad-32.json: JSON: offset 5: number with leading zero +exception: bad-32.json: JSON: offset 1: number with leading zero diff --git a/libtests/qtest/json_parse/bad-33.out b/libtests/qtest/json_parse/bad-33.out index ae41e48b..98a72942 100644 --- a/libtests/qtest/json_parse/bad-33.out +++ b/libtests/qtest/json_parse/bad-33.out @@ -1 +1 @@ -exception: bad-33.json: JSON: offset 6: number with leading zero +exception: bad-33.json: JSON: offset 2: number with leading zero -- cgit v1.2.3-54-g00ecf From bfda62aeeb72355b5e8b6fbcdc94c50afaa366ab Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 23 Jan 2023 19:51:27 +0000 Subject: In JSONParser add lex_state ls_number_before_point --- libqpdf/JSON.cc | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 1c3378a7..c8c4fdb0 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -649,6 +649,7 @@ namespace ls_number, ls_number_minus, ls_number_leading_zero, + ls_number_before_point, ls_alpha, ls_string, ls_backslash, @@ -820,7 +821,7 @@ JSONParser::numberError() "JSON: offset " + std::to_string(offset) + ": numeric literal: e already seen"); } else if ((*p == '+') || (*p == '-')) { - // QTC::TC("libtests", "JSON parse unexpected sign"); + QTC::TC("libtests", "JSON parse unexpected sign"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); @@ -870,7 +871,7 @@ JSONParser::getToken() number_saw_point = false; number_saw_e = false; } else if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number; + lex_state = ls_number_before_point; number_before_point = 1; number_after_point = 0; number_after_e = 0; @@ -903,7 +904,7 @@ JSONParser::getToken() case ls_number_minus: if ((*p >= '1') && (*p <= '9')) { ++number_before_point; - lex_state = ls_number; + lex_state = ls_number_before_point; } else if (*p == '0') { ++number_before_point; lex_state = ls_number_leading_zero; @@ -934,6 +935,26 @@ JSONParser::getToken() } break; + case ls_number_before_point: + if ((*p >= '0') && (*p <= '9')) { + ++number_before_point; + } else if (*p == '.') { + number_saw_point = true; + lex_state = ls_number; + } else if (*p == 'e') { + number_saw_e = true; + lex_state = ls_number; + } else if (QUtil::is_space(*p)) { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + numberError(); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -970,7 +991,6 @@ JSONParser::getToken() if (number_saw_e && (number_after_e == 0)) { // okay } else { - QTC::TC("libtests", "JSON parse unexpected sign"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); @@ -1062,6 +1082,7 @@ JSONParser::getToken() throw std::logic_error("tok_start set in ls_top while parsing"); break; + case ls_number_before_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: @@ -1137,6 +1158,7 @@ JSONParser::handleToken() } break; + case ls_number_before_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: -- cgit v1.2.3-54-g00ecf From cdd1f0a9f60747ec0e25139dd530be3caf468eca Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 23 Jan 2023 20:08:11 +0000 Subject: In JSONParser add lex_state ls_number_point Also. remove '.' as starting char in lsTop. --- libqpdf/JSON.cc | 30 ++++++++++++++++++++---------- libtests/libtests.testcov | 1 + libtests/qtest/json_parse/bad-04.out | 2 +- libtests/qtest/json_parse/bad-30.out | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c8c4fdb0..b068aca2 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -650,6 +650,7 @@ namespace ls_number_minus, ls_number_leading_zero, ls_number_before_point, + ls_number_point, ls_alpha, ls_string, ls_backslash, @@ -825,6 +826,11 @@ JSONParser::numberError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); + } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) { + QTC::TC("libtests", "JSON parse incomplete number"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: incomplete number"); } else { QTC::TC("libtests", "JSON parse numeric bad character"); throw std::runtime_error( @@ -884,13 +890,6 @@ JSONParser::getToken() number_after_e = 0; number_saw_point = false; number_saw_e = false; - } else if (*p == '.') { - lex_state = ls_number; - number_before_point = 0; - number_after_point = 0; - number_after_e = 0; - number_saw_point = true; - number_saw_e = false; } else if (strchr("{}[]:,", *p)) { ready = true; } else { @@ -918,7 +917,7 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { - lex_state = ls_number; + lex_state = ls_number_point; } else if (*p == 'e') { lex_state = ls_number; } else if (QUtil::is_space(*p)) { @@ -940,7 +939,7 @@ JSONParser::getToken() ++number_before_point; } else if (*p == '.') { number_saw_point = true; - lex_state = ls_number; + lex_state = ls_number_point; } else if (*p == 'e') { number_saw_e = true; lex_state = ls_number; @@ -955,6 +954,15 @@ JSONParser::getToken() } break; + case ls_number_point: + if ((*p >= '0') && (*p <= '9')) { + ++number_after_point; + lex_state = ls_number; + } else { + numberError(); + } + break; + case ls_number: if ((*p >= '0') && (*p <= '9')) { if (number_saw_e) { @@ -1083,6 +1091,7 @@ JSONParser::getToken() break; case ls_number_before_point: + case ls_number_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: @@ -1158,10 +1167,11 @@ JSONParser::handleToken() } break; - case ls_number_before_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: + case ls_number_before_point: + case ls_number_point: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 26cf2048..5ca5fb77 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -76,6 +76,7 @@ JSON parse duplicate e 0 JSON parse unexpected sign 0 JSON parse numeric bad character 0 JSON parse number minus no digits 0 +JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 JSON parse unterminated string 0 diff --git a/libtests/qtest/json_parse/bad-04.out b/libtests/qtest/json_parse/bad-04.out index 7fe71693..27d252f2 100644 --- a/libtests/qtest/json_parse/bad-04.out +++ b/libtests/qtest/json_parse/bad-04.out @@ -1 +1 @@ -exception: bad-04.json: JSON: offset 5: decimal point with no digits +exception: bad-04.json: JSON: offset 4: unexpected character . diff --git a/libtests/qtest/json_parse/bad-30.out b/libtests/qtest/json_parse/bad-30.out index bff961af..ec63bb09 100644 --- a/libtests/qtest/json_parse/bad-30.out +++ b/libtests/qtest/json_parse/bad-30.out @@ -1 +1 @@ -exception: bad-30.json: JSON: offset 5: decimal point with no digits +exception: bad-30.json: JSON: offset 4: numeric literal: incomplete number -- cgit v1.2.3-54-g00ecf From 972ebca5055c5077b117c497355f264036fed1ec Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 00:34:22 +0000 Subject: In JSONParser add lex_state ls_number_after_point --- libqpdf/JSON.cc | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index b068aca2..bb4b3e55 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -651,6 +651,7 @@ namespace ls_number_leading_zero, ls_number_before_point, ls_number_point, + ls_number_after_point, ls_alpha, ls_string, ls_backslash, @@ -811,7 +812,7 @@ JSONParser::numberError() "JSON: offset " + std::to_string(offset) + ": numeric literal: decimal point after e"); } else { - // QTC::TC("libtests", "JSON parse duplicate point"); + QTC::TC("libtests", "JSON parse duplicate point"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: decimal point already seen"); @@ -957,7 +958,24 @@ JSONParser::getToken() case ls_number_point: if ((*p >= '0') && (*p <= '9')) { ++number_after_point; + lex_state = ls_number_after_point; + } else { + numberError(); + } + break; + + case ls_number_after_point: + if ((*p >= '0') && (*p <= '9')) { + ++number_after_point; + } else if (*p == 'e') { + number_saw_e = true; lex_state = ls_number; + } else if (QUtil::is_space(*p)) { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; } else { numberError(); } @@ -979,7 +997,6 @@ JSONParser::getToken() "JSON: offset " + std::to_string(offset) + ": numeric literal: decimal point after e"); } else if (number_saw_point) { - QTC::TC("libtests", "JSON parse duplicate point"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: decimal point already seen"); @@ -1092,6 +1109,7 @@ JSONParser::getToken() case ls_number_before_point: case ls_number_point: + case ls_number_after_point: case ls_number: case ls_number_minus: case ls_number_leading_zero: @@ -1172,6 +1190,7 @@ JSONParser::handleToken() case ls_number_leading_zero: case ls_number_before_point: case ls_number_point: + case ls_number_after_point: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( -- cgit v1.2.3-54-g00ecf From 04333bbc201963a8e716c5949bad071ea6c1a2fe Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 01:36:38 +0000 Subject: In JSONParser add lex_state ls_number_e Also, allow 'E' as alternative to 'e'. --- libqpdf/JSON.cc | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index bb4b3e55..d6baf584 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -652,6 +652,7 @@ namespace ls_number_before_point, ls_number_point, ls_number_after_point, + ls_number_e, ls_alpha, ls_string, ls_backslash, @@ -817,7 +818,7 @@ JSONParser::numberError() "JSON: offset " + std::to_string(offset) + ": numeric literal: decimal point already seen"); } - } else if (*p == 'e') { + } else if (*p == 'e' || *p == 'E') { // QTC::TC("libtests", "JSON parse duplicate e"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + @@ -919,14 +920,15 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { lex_state = ls_number_point; - } else if (*p == 'e') { - lex_state = ls_number; } else if (QUtil::is_space(*p)) { action = ignore; ready = true; } else if (strchr("{}[]:,", *p)) { action = reread; ready = true; + } else if (*p == 'e' || *p == 'E') { + number_saw_e = true; + lex_state = ls_number_e; } else { QTC::TC("libtests", "JSON parse leading zero"); throw std::runtime_error( @@ -941,15 +943,15 @@ JSONParser::getToken() } else if (*p == '.') { number_saw_point = true; lex_state = ls_number_point; - } else if (*p == 'e') { - number_saw_e = true; - lex_state = ls_number; } else if (QUtil::is_space(*p)) { action = ignore; ready = true; } else if (strchr("{}[]:,", *p)) { action = reread; ready = true; + } else if (*p == 'e' || *p == 'E') { + number_saw_e = true; + lex_state = ls_number_e; } else { numberError(); } @@ -967,15 +969,26 @@ JSONParser::getToken() case ls_number_after_point: if ((*p >= '0') && (*p <= '9')) { ++number_after_point; - } else if (*p == 'e') { - number_saw_e = true; - lex_state = ls_number; } else if (QUtil::is_space(*p)) { action = ignore; ready = true; } else if (strchr("{}[]:,", *p)) { action = reread; ready = true; + } else if (*p == 'e' || *p == 'E') { + number_saw_e = true; + lex_state = ls_number_e; + } else { + numberError(); + } + break; + + case ls_number_e: + if ((*p >= '0') && (*p <= '9')) { + ++number_after_e; + lex_state = ls_number; + } else if ((*p == '+') || (*p == '-')) { + lex_state = ls_number; } else { numberError(); } @@ -1003,7 +1016,7 @@ JSONParser::getToken() } else { number_saw_point = true; } - } else if (*p == 'e') { + } else if (*p == 'e' || *p == 'E') { if (number_saw_e) { QTC::TC("libtests", "JSON parse duplicate e"); throw std::runtime_error( @@ -1110,6 +1123,7 @@ JSONParser::getToken() case ls_number_before_point: case ls_number_point: case ls_number_after_point: + case ls_number_e: case ls_number: case ls_number_minus: case ls_number_leading_zero: @@ -1191,6 +1205,7 @@ JSONParser::handleToken() case ls_number_before_point: case ls_number_point: case ls_number_after_point: + case ls_number_e: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( -- cgit v1.2.3-54-g00ecf From 6ea543e6c731db30b0807f531a445a66c66619b9 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Jan 2023 13:04:31 +0000 Subject: In JSONParser add lex_state ls_number_e_sign --- libqpdf/JSON.cc | 17 ++++++++++++++--- libtests/qtest/json_parse.test | 2 ++ libtests/qtest/json_parse/bad-43.json | 1 + libtests/qtest/json_parse/bad-43.out | 1 + libtests/qtest/json_parse/bad-44.json | 1 + libtests/qtest/json_parse/bad-44.out | 1 + 6 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 libtests/qtest/json_parse/bad-43.json create mode 100644 libtests/qtest/json_parse/bad-43.out create mode 100644 libtests/qtest/json_parse/bad-44.json create mode 100644 libtests/qtest/json_parse/bad-44.out diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index d6baf584..8e55b08c 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -653,6 +653,7 @@ namespace ls_number_point, ls_number_after_point, ls_number_e, + ls_number_e_sign, ls_alpha, ls_string, ls_backslash, @@ -988,6 +989,14 @@ JSONParser::getToken() ++number_after_e; lex_state = ls_number; } else if ((*p == '+') || (*p == '-')) { + lex_state = ls_number_e_sign; + } else { + numberError(); + } + break; + + case ls_number_e_sign: + if ((*p >= '0') && (*p <= '9')) { lex_state = ls_number; } else { numberError(); @@ -1120,13 +1129,14 @@ JSONParser::getToken() throw std::logic_error("tok_start set in ls_top while parsing"); break; + case ls_number: + case ls_number_minus: + case ls_number_leading_zero: case ls_number_before_point: case ls_number_point: case ls_number_after_point: case ls_number_e: - case ls_number: - case ls_number_minus: - case ls_number_leading_zero: + case ls_number_e_sign: case ls_alpha: // okay break; @@ -1206,6 +1216,7 @@ JSONParser::handleToken() case ls_number_point: case ls_number_after_point: case ls_number_e: + case ls_number_e_sign: if (number_saw_point && (number_after_point == 0)) { // QTC::TC("libtests", "JSON parse decimal with no digits"); throw std::runtime_error( diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 7c64e3bd..d38d70de 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -123,6 +123,8 @@ my @bad = ( "duplicate dictionary key", # 40 "decimal point after minus",# 41 "e after minus", # 42 + "missing digit after e", # 43 + "missing digit after e+/-", # 44 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-43.json b/libtests/qtest/json_parse/bad-43.json new file mode 100644 index 00000000..896a676a --- /dev/null +++ b/libtests/qtest/json_parse/bad-43.json @@ -0,0 +1 @@ +123e diff --git a/libtests/qtest/json_parse/bad-43.out b/libtests/qtest/json_parse/bad-43.out new file mode 100644 index 00000000..84070aa9 --- /dev/null +++ b/libtests/qtest/json_parse/bad-43.out @@ -0,0 +1 @@ +exception: bad-43.json: JSON: offset 4: numeric literal: incomplete number diff --git a/libtests/qtest/json_parse/bad-44.json b/libtests/qtest/json_parse/bad-44.json new file mode 100644 index 00000000..3a5d7dff --- /dev/null +++ b/libtests/qtest/json_parse/bad-44.json @@ -0,0 +1 @@ +123e+ diff --git a/libtests/qtest/json_parse/bad-44.out b/libtests/qtest/json_parse/bad-44.out new file mode 100644 index 00000000..f72120c4 --- /dev/null +++ b/libtests/qtest/json_parse/bad-44.out @@ -0,0 +1 @@ +exception: bad-44.json: JSON: offset 5: numeric literal: incomplete number -- cgit v1.2.3-54-g00ecf From 8fd6e1c5b24d0fc804c25004f398b39b5041b034 Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 25 Jan 2023 19:14:36 +0000 Subject: Refactor handling of ls_number in JSONParser::getToken Reflect that the ls_number case only handles the digits after an 'e'. Also, change state to ls_number for all 'ready' numbers. --- libqpdf/JSON.cc | 47 +++++++++-------------------------------------- 1 file changed, 9 insertions(+), 38 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 8e55b08c..3f1a3879 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -809,7 +809,7 @@ JSONParser::numberError() { if (*p == '.') { if (number_saw_e) { - // QTC::TC("libtests", "JSON parse point after e"); + QTC::TC("libtests", "JSON parse point after e"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: decimal point after e"); @@ -820,7 +820,7 @@ JSONParser::numberError() ": numeric literal: decimal point already seen"); } } else if (*p == 'e' || *p == 'E') { - // QTC::TC("libtests", "JSON parse duplicate e"); + QTC::TC("libtests", "JSON parse duplicate e"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: e already seen"); @@ -922,9 +922,11 @@ JSONParser::getToken() if (*p == '.') { lex_state = ls_number_point; } else if (QUtil::is_space(*p)) { + lex_state = ls_number; action = ignore; ready = true; } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; action = reread; ready = true; } else if (*p == 'e' || *p == 'E') { @@ -945,9 +947,11 @@ JSONParser::getToken() number_saw_point = true; lex_state = ls_number_point; } else if (QUtil::is_space(*p)) { + lex_state = ls_number; action = ignore; ready = true; } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; action = reread; ready = true; } else if (*p == 'e' || *p == 'E') { @@ -971,9 +975,11 @@ JSONParser::getToken() if ((*p >= '0') && (*p <= '9')) { ++number_after_point; } else if (QUtil::is_space(*p)) { + lex_state = ls_number; action = ignore; ready = true; } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; action = reread; ready = true; } else if (*p == 'e' || *p == 'E') { @@ -1004,44 +1010,9 @@ JSONParser::getToken() break; case ls_number: + // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { - if (number_saw_e) { ++number_after_e; - } else if (number_saw_point) { - ++number_after_point; - } else { - ++number_before_point; - } - } else if (*p == '.') { - if (number_saw_e) { - QTC::TC("libtests", "JSON parse point after e"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: decimal point after e"); - } else if (number_saw_point) { - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: decimal point already seen"); - } else { - number_saw_point = true; - } - } else if (*p == 'e' || *p == 'E') { - if (number_saw_e) { - QTC::TC("libtests", "JSON parse duplicate e"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: e already seen"); - } else { - number_saw_e = true; - } - } else if ((*p == '+') || (*p == '-')) { - if (number_saw_e && (number_after_e == 0)) { - // okay - } else { - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: unexpected sign"); - } } else if (QUtil::is_space(*p)) { action = ignore; ready = true; -- cgit v1.2.3-54-g00ecf From 83f972ceda20e244f52bde7ac052e6931a6d33d3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 09:55:34 +0000 Subject: Refactor end of input handling in JSONParser --- libqpdf/JSON.cc | 62 +++++++----------------------------- libtests/libtests.testcov | 3 +- libtests/qtest/json_parse/bad-27.out | 2 +- libtests/qtest/json_parse/bad-28.out | 2 +- libtests/qtest/json_parse/bad-34.out | 2 +- 5 files changed, 16 insertions(+), 55 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 3f1a3879..6ee11309 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1012,7 +1012,7 @@ JSONParser::getToken() case ls_number: // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { - ++number_after_e; + ++number_after_e; } else if (QUtil::is_space(*p)) { action = ignore; ready = true; @@ -1093,38 +1093,27 @@ JSONParser::getToken() } } if (done) { - if ((!token.empty()) && (!ready)) { + if (!token.empty() && !ready) { switch (lex_state) { case ls_top: // Can't happen throw std::logic_error("tok_start set in ls_top while parsing"); break; - case ls_number: - case ls_number_minus: case ls_number_leading_zero: case ls_number_before_point: - case ls_number_point: case ls_number_after_point: - case ls_number_e: - case ls_number_e_sign: - case ls_alpha: - // okay + lex_state = ls_number; break; - case ls_u4: - QTC::TC("libtests", "JSON parse premature end of u"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset - u_count - 1) + - ": \\u must be followed by four characters"); - - case ls_string: - case ls_backslash: - QTC::TC("libtests", "JSON parse unterminated string"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unterminated string"); + case ls_number: + case ls_alpha: + // terminal state break; + + default: + QTC::TC("libtests", "JSON parse ls premature end of input"); + throw std::runtime_error("JSON: premature end of input"); } } } @@ -1181,32 +1170,6 @@ JSONParser::handleToken() break; case ls_number: - case ls_number_minus: - case ls_number_leading_zero: - case ls_number_before_point: - case ls_number_point: - case ls_number_after_point: - case ls_number_e: - case ls_number_e_sign: - if (number_saw_point && (number_after_point == 0)) { - // QTC::TC("libtests", "JSON parse decimal with no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": decimal point with no digits"); - } - if ((number_before_point > 1) && - ((first_char == '0') || - ((first_char == '-') && (token.at(1) == '0')))) { - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with leading zero"); - } - if ((number_before_point == 0) && (number_after_point == 0)) { - // QTC::TC("libtests", "JSON parse number no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with no digits"); - } item = std::make_shared(JSON::makeNumber(token)); break; @@ -1229,10 +1192,9 @@ JSONParser::handleToken() item = std::make_shared(JSON::makeString(s_value)); break; - case ls_backslash: - case ls_u4: + default: throw std::logic_error( - "tok_end is set while state = ls_backslash or ls_u4"); + "JSONParser::handleToken : non-terminal lexer state encountered"); break; } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 5ca5fb77..4b3bb45b 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -79,9 +79,8 @@ JSON parse number minus no digits 0 JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 -JSON parse unterminated string 0 JSON parse leading zero 0 -JSON parse premature end of u 0 +JSON parse ls premature end of input 0 JSON parse bad hex after u 0 JSONHandler unhandled value 0 JSONHandler unexpected key 0 diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out index 2c2df076..70fcbf74 100644 --- a/libtests/qtest/json_parse/bad-27.out +++ b/libtests/qtest/json_parse/bad-27.out @@ -1 +1 @@ -exception: bad-27.json: JSON: offset 6: unterminated string +exception: bad-27.json: JSON: premature end of input diff --git a/libtests/qtest/json_parse/bad-28.out b/libtests/qtest/json_parse/bad-28.out index d7db2aea..005a68d2 100644 --- a/libtests/qtest/json_parse/bad-28.out +++ b/libtests/qtest/json_parse/bad-28.out @@ -1 +1 @@ -exception: bad-28.json: JSON: offset 16: unterminated string +exception: bad-28.json: JSON: premature end of input diff --git a/libtests/qtest/json_parse/bad-34.out b/libtests/qtest/json_parse/bad-34.out index f9db587a..c21838c4 100644 --- a/libtests/qtest/json_parse/bad-34.out +++ b/libtests/qtest/json_parse/bad-34.out @@ -1 +1 @@ -exception: bad-34.json: JSON: offset 3: \u must be followed by four characters +exception: bad-34.json: JSON: premature end of input -- cgit v1.2.3-54-g00ecf From 1e0ab79aecf5e8e2a1da0618538109819b1ea139 Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 25 Jan 2023 19:46:53 +0000 Subject: Remove redundant JSONParser::before_point etc --- libqpdf/JSON.cc | 42 +++++------------------------------------- 1 file changed, 5 insertions(+), 37 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 6ee11309..616e8ed2 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -600,11 +600,6 @@ namespace is(is), reactor(reactor), lex_state(ls_top), - number_before_point(0), - number_after_point(0), - number_after_e(0), - number_saw_point(false), - number_saw_e(false), bytes(0), p(buf), u_count(0), @@ -663,11 +658,6 @@ namespace InputSource& is; JSON::Reactor* reactor; lex_state_e lex_state; - size_t number_before_point; - size_t number_after_point; - size_t number_after_e; - bool number_saw_point; - bool number_saw_e; char buf[16384]; size_t bytes; char const* p; @@ -808,7 +798,8 @@ void JSONParser::numberError() { if (*p == '.') { - if (number_saw_e) { + if (lex_state == ls_number || lex_state == ls_number_e || + lex_state == ls_number_e_sign) { QTC::TC("libtests", "JSON parse point after e"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + @@ -874,25 +865,10 @@ JSONParser::getToken() lex_state = ls_alpha; } else if (*p == '-') { lex_state = ls_number_minus; - number_before_point = 0; - number_after_point = 0; - number_after_e = 0; - number_saw_point = false; - number_saw_e = false; } else if ((*p >= '1') && (*p <= '9')) { lex_state = ls_number_before_point; - number_before_point = 1; - number_after_point = 0; - number_after_e = 0; - number_saw_point = false; - number_saw_e = false; } else if (*p == '0') { lex_state = ls_number_leading_zero; - number_before_point = 1; - number_after_point = 0; - number_after_e = 0; - number_saw_point = false; - number_saw_e = false; } else if (strchr("{}[]:,", *p)) { ready = true; } else { @@ -905,10 +881,8 @@ JSONParser::getToken() case ls_number_minus: if ((*p >= '1') && (*p <= '9')) { - ++number_before_point; lex_state = ls_number_before_point; } else if (*p == '0') { - ++number_before_point; lex_state = ls_number_leading_zero; } else { QTC::TC("libtests", "JSON parse number minus no digits"); @@ -930,7 +904,6 @@ JSONParser::getToken() action = reread; ready = true; } else if (*p == 'e' || *p == 'E') { - number_saw_e = true; lex_state = ls_number_e; } else { QTC::TC("libtests", "JSON parse leading zero"); @@ -942,9 +915,8 @@ JSONParser::getToken() case ls_number_before_point: if ((*p >= '0') && (*p <= '9')) { - ++number_before_point; + // continue } else if (*p == '.') { - number_saw_point = true; lex_state = ls_number_point; } else if (QUtil::is_space(*p)) { lex_state = ls_number; @@ -955,7 +927,6 @@ JSONParser::getToken() action = reread; ready = true; } else if (*p == 'e' || *p == 'E') { - number_saw_e = true; lex_state = ls_number_e; } else { numberError(); @@ -964,7 +935,6 @@ JSONParser::getToken() case ls_number_point: if ((*p >= '0') && (*p <= '9')) { - ++number_after_point; lex_state = ls_number_after_point; } else { numberError(); @@ -973,7 +943,7 @@ JSONParser::getToken() case ls_number_after_point: if ((*p >= '0') && (*p <= '9')) { - ++number_after_point; + // continue } else if (QUtil::is_space(*p)) { lex_state = ls_number; action = ignore; @@ -983,7 +953,6 @@ JSONParser::getToken() action = reread; ready = true; } else if (*p == 'e' || *p == 'E') { - number_saw_e = true; lex_state = ls_number_e; } else { numberError(); @@ -992,7 +961,6 @@ JSONParser::getToken() case ls_number_e: if ((*p >= '0') && (*p <= '9')) { - ++number_after_e; lex_state = ls_number; } else if ((*p == '+') || (*p == '-')) { lex_state = ls_number_e_sign; @@ -1012,7 +980,7 @@ JSONParser::getToken() case ls_number: // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { - ++number_after_e; + // continue } else if (QUtil::is_space(*p)) { action = ignore; ready = true; -- cgit v1.2.3-54-g00ecf From 39dfd305c8b29d0959c3ac1201f0406f55091e85 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 13:21:45 +0000 Subject: In JSONParser add lexer states for delimiters --- libqpdf/JSON.cc | 91 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 616e8ed2..4ea3507b 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -653,6 +653,12 @@ namespace ls_string, ls_backslash, ls_u4, + ls_begin_array, + ls_end_array, + ls_begin_dict, + ls_end_dict, + ls_colon, + ls_comma, }; InputSource& is; @@ -861,6 +867,24 @@ JSONParser::getToken() lex_state = ls_string; } else if (QUtil::is_space(*p)) { action = ignore; + } else if (*p == ',') { + lex_state = ls_comma; + ready = true; + } else if (*p == ':') { + lex_state = ls_colon; + ready = true; + } else if (*p == '{') { + lex_state = ls_begin_dict; + ready = true; + } else if (*p == '}') { + lex_state = ls_end_dict; + ready = true; + } else if (*p == '[') { + lex_state = ls_begin_array; + ready = true; + } else if (*p == ']') { + lex_state = ls_end_array; + ready = true; } else if ((*p >= 'a') && (*p <= 'z')) { lex_state = ls_alpha; } else if (*p == '-') { @@ -869,8 +893,6 @@ JSONParser::getToken() lex_state = ls_number_before_point; } else if (*p == '0') { lex_state = ls_number_leading_zero; - } else if (strchr("{}[]:,", *p)) { - ready = true; } else { QTC::TC("libtests", "JSON parse bad character"); throw std::runtime_error( @@ -1044,6 +1066,10 @@ JSONParser::getToken() lex_state = ls_string; } break; + + default: + throw std::logic_error( + "JSONParser::getToken : trying to handle delimiter state"); } switch (action) { case reread: @@ -1090,7 +1116,7 @@ JSONParser::getToken() void JSONParser::handleToken() { - if (token.empty()) { + if (lex_state == ls_top) { return; } @@ -1110,31 +1136,25 @@ JSONParser::handleToken() } s_value = decode_string(token, offset - toO(token.length())); } - // Based on the lexical state and value, figure out whether we are - // looking at an item or a delimiter. It will always be exactly - // one of those two or an error condition. std::shared_ptr item; - char delimiter = '\0'; - // Already verified that token is not empty - char first_char = token.at(0); + switch (lex_state) { - case ls_top: - switch (first_char) { - case '{': - item = std::make_shared(JSON::makeDictionary()); - item->setStart(offset - toO(token.length())); - break; + case ls_begin_dict: + item = std::make_shared(JSON::makeDictionary()); + item->setStart(offset - toO(token.length())); + break; - case '[': - item = std::make_shared(JSON::makeArray()); - item->setStart(offset - toO(token.length())); - break; + case ls_begin_array: + item = std::make_shared(JSON::makeArray()); + item->setStart(offset - toO(token.length())); + break; - default: - delimiter = first_char; - break; - } + case ls_colon: + case ls_comma: + case ls_end_array: + case ls_end_dict: + // continue break; case ls_number: @@ -1166,12 +1186,6 @@ JSONParser::handleToken() break; } - if ((item == nullptr) == (delimiter == '\0')) { - throw std::logic_error( - "JSONParser::handleToken: logic error: exactly one of item" - " or delimiter must be set"); - } - // See whether what we have is allowed at this point. if (item.get()) { @@ -1217,7 +1231,7 @@ JSONParser::handleToken() break; // okay } - } else if (delimiter == '}') { + } else if (lex_state == ls_end_dict) { if (!((parser_state == ps_dict_begin) || (parser_state == ps_dict_after_item))) @@ -1227,7 +1241,7 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter"); } - } else if (delimiter == ']') { + } else if (lex_state == ls_end_array) { if (!((parser_state == ps_array_begin) || (parser_state == ps_array_after_item))) @@ -1237,14 +1251,14 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter"); } - } else if (delimiter == ':') { + } else if (lex_state == ls_colon) { if (parser_state != ps_dict_after_key) { QTC::TC("libtests", "JSON parse unexpected :"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": unexpected colon"); } - } else if (delimiter == ',') { + } else if (lex_state == ls_comma) { if (!((parser_state == ps_dict_after_item) || (parser_state == ps_array_after_item))) { QTC::TC("libtests", "JSON parse unexpected ,"); @@ -1252,17 +1266,15 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected comma"); } - } else if (delimiter != '\0') { - throw std::logic_error("JSONParser::handleToken: bad delimiter"); } // Now we know we have a delimiter or item that is allowed. Do // whatever we need to do with it. parser_state_e next_state = ps_top; - if (delimiter == ':') { + if (lex_state == ls_colon) { next_state = ps_dict_after_colon; - } else if (delimiter == ',') { + } else if (lex_state == ls_comma) { if (parser_state == ps_dict_after_item) { next_state = ps_dict_after_comma; } else if (parser_state == ps_array_after_item) { @@ -1271,7 +1283,7 @@ JSONParser::handleToken() throw std::logic_error("JSONParser::handleToken: unexpected parser" " state for comma"); } - } else if ((delimiter == '}') || (delimiter == ']')) { + } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) { next_state = ps_stack.back(); ps_stack.pop_back(); auto tos = stack.back(); @@ -1282,9 +1294,6 @@ JSONParser::handleToken() if (next_state != ps_done) { stack.pop_back(); } - } else if (delimiter != '\0') { - throw std::logic_error( - "JSONParser::handleToken: unexpected delimiter in transition"); } else if (item.get()) { if (!(item->isArray() || item->isDictionary())) { item->setStart(offset - toO(token.length())); -- cgit v1.2.3-54-g00ecf From bb89a60320c44199e40c24c3c4681d4a2e41ff97 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 11:36:44 +0000 Subject: Add data member JSONParser::token_start --- libqpdf/JSON.cc | 10 ++++++---- qpdf/qtest/qpdf/qjson-objects-not-dict.out | 2 +- qpdf/qtest/qpdf/qjson-stream-not-dict.out | 2 +- qpdf/qtest/qpdf/qjson-trailer-not-dict.out | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 4ea3507b..5950f920 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -671,6 +671,7 @@ namespace qpdf_offset_t offset; bool done; std::string token; + qpdf_offset_t token_start{0}; parser_state_e parser_state; std::vector> stack; std::vector ps_stack; @@ -863,6 +864,7 @@ JSONParser::getToken() action = append; switch (lex_state) { case ls_top: + token_start = offset; if (*p == '"') { lex_state = ls_string; } else if (QUtil::is_space(*p)) { @@ -1134,7 +1136,7 @@ JSONParser::handleToken() if (token.length() < 2) { throw std::logic_error("JSON string length < 2"); } - s_value = decode_string(token, offset - toO(token.length())); + s_value = decode_string(token, token_start); } std::shared_ptr item; @@ -1142,12 +1144,12 @@ JSONParser::handleToken() switch (lex_state) { case ls_begin_dict: item = std::make_shared(JSON::makeDictionary()); - item->setStart(offset - toO(token.length())); + item->setStart(token_start); break; case ls_begin_array: item = std::make_shared(JSON::makeArray()); - item->setStart(offset - toO(token.length())); + item->setStart(token_start); break; case ls_colon: @@ -1296,7 +1298,7 @@ JSONParser::handleToken() } } else if (item.get()) { if (!(item->isArray() || item->isDictionary())) { - item->setStart(offset - toO(token.length())); + item->setStart(token_start); item->setEnd(offset); } diff --git a/qpdf/qtest/qpdf/qjson-objects-not-dict.out b/qpdf/qtest/qpdf/qjson-objects-not-dict.out index 219b00e2..817ab4c4 100644 --- a/qpdf/qtest/qpdf/qjson-objects-not-dict.out +++ b/qpdf/qtest/qpdf/qjson-objects-not-dict.out @@ -1,3 +1,3 @@ -WARNING: qjson-objects-not-dict.json (offset 82): "qpdf[1]" must be a dictionary +WARNING: qjson-objects-not-dict.json (offset 81): "qpdf[1]" must be a dictionary WARNING: qjson-objects-not-dict.json: "qpdf[1].trailer" was not seen qpdf: qjson-objects-not-dict.json: errors found in JSON diff --git a/qpdf/qtest/qpdf/qjson-stream-not-dict.out b/qpdf/qtest/qpdf/qjson-stream-not-dict.out index 6a462ff6..fbd953c6 100644 --- a/qpdf/qtest/qpdf/qjson-stream-not-dict.out +++ b/qpdf/qtest/qpdf/qjson-stream-not-dict.out @@ -1,3 +1,3 @@ -WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 123): "stream" must be a dictionary +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen qpdf: qjson-stream-not-dict.json: errors found in JSON diff --git a/qpdf/qtest/qpdf/qjson-trailer-not-dict.out b/qpdf/qtest/qpdf/qjson-trailer-not-dict.out index 3b9d482d..b8fe65e1 100644 --- a/qpdf/qtest/qpdf/qjson-trailer-not-dict.out +++ b/qpdf/qtest/qpdf/qjson-trailer-not-dict.out @@ -1,2 +1,2 @@ -WARNING: qjson-trailer-not-dict.json (trailer, offset 1269): "trailer.value" must be a dictionary +WARNING: qjson-trailer-not-dict.json (trailer, offset 1268): "trailer.value" must be a dictionary qpdf: qjson-trailer-not-dict.json: errors found in JSON -- cgit v1.2.3-54-g00ecf From fcc123a62a9fb1cf00288255be8d5c904e43402c Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 13:31:53 +0000 Subject: Avoid copying delimiters in JSONParser::getToken --- libqpdf/JSON.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 5950f920..218ea724 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -871,21 +871,27 @@ JSONParser::getToken() action = ignore; } else if (*p == ',') { lex_state = ls_comma; + action = ignore; ready = true; } else if (*p == ':') { lex_state = ls_colon; + action = ignore; ready = true; } else if (*p == '{') { lex_state = ls_begin_dict; + action = ignore; ready = true; } else if (*p == '}') { lex_state = ls_end_dict; + action = ignore; ready = true; } else if (*p == '[') { lex_state = ls_begin_array; + action = ignore; ready = true; } else if (*p == ']') { lex_state = ls_end_array; + action = ignore; ready = true; } else if ((*p >= 'a') && (*p <= 'z')) { lex_state = ls_alpha; -- cgit v1.2.3-54-g00ecf From 6f94a3a89ab4dc7be0c053c53a94868b6c9a747c Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 13:48:29 +0000 Subject: In JSONParser::handleToken move string decoding into switch statement --- libqpdf/JSON.cc | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 218ea724..ef652a86 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1135,16 +1135,7 @@ JSONParser::handleToken() ": material follows end of object: " + token); } - // Git string value std::string s_value; - if (lex_state == ls_string) { - // Token includes the quotation marks - if (token.length() < 2) { - throw std::logic_error("JSON string length < 2"); - } - s_value = decode_string(token, token_start); - } - std::shared_ptr item; switch (lex_state) { @@ -1185,6 +1176,11 @@ JSONParser::handleToken() break; case ls_string: + // Token includes the quotation marks + if (token.length() < 2) { + throw std::logic_error("JSON string length < 2"); + } + s_value = decode_string(token, token_start); item = std::make_shared(JSON::makeString(s_value)); break; -- cgit v1.2.3-54-g00ecf From 4dba3c95dd9cc721957f8138fe19ab2872328f27 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 26 Jan 2023 14:10:48 +0000 Subject: In JSONParser::handleToken move validation for ls_colon etc into switch statement --- libqpdf/JSON.cc | 71 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index ef652a86..32fe5730 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1150,10 +1150,44 @@ JSONParser::handleToken() break; case ls_colon: + if (parser_state != ps_dict_after_key) { + QTC::TC("libtests", "JSON parse unexpected :"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected colon"); + } + break; + case ls_comma: + if (!((parser_state == ps_dict_after_item) || + (parser_state == ps_array_after_item))) { + QTC::TC("libtests", "JSON parse unexpected ,"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected comma"); + } + break; + case ls_end_array: + if (!((parser_state == ps_array_begin) || + (parser_state == ps_array_after_item))) + + { + QTC::TC("libtests", "JSON parse unexpected ]"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected array end delimiter"); + } + break; + case ls_end_dict: - // continue + if (!((parser_state == ps_dict_begin) || + (parser_state == ps_dict_after_item))) { + QTC::TC("libtests", "JSON parse unexpected }"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected dictionary end delimiter"); + } break; case ls_number: @@ -1235,41 +1269,6 @@ JSONParser::handleToken() break; // okay } - } else if (lex_state == ls_end_dict) { - if (!((parser_state == ps_dict_begin) || - (parser_state == ps_dict_after_item))) - - { - QTC::TC("libtests", "JSON parse unexpected }"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected dictionary end delimiter"); - } - } else if (lex_state == ls_end_array) { - if (!((parser_state == ps_array_begin) || - (parser_state == ps_array_after_item))) - - { - QTC::TC("libtests", "JSON parse unexpected ]"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected array end delimiter"); - } - } else if (lex_state == ls_colon) { - if (parser_state != ps_dict_after_key) { - QTC::TC("libtests", "JSON parse unexpected :"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected colon"); - } - } else if (lex_state == ls_comma) { - if (!((parser_state == ps_dict_after_item) || - (parser_state == ps_array_after_item))) { - QTC::TC("libtests", "JSON parse unexpected ,"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected comma"); - } } // Now we know we have a delimiter or item that is allowed. Do -- cgit v1.2.3-54-g00ecf From a7338ab15ae6209dbdefb7121b9051cb8ede5fa2 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 12:26:27 +0000 Subject: In JSONParser::handleToken move processing for ls_colon and ls_comma into switch statement --- libqpdf/JSON.cc | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 32fe5730..20b2609a 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1156,7 +1156,9 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected colon"); } - break; + parser_state = ps_dict_after_colon; + lex_state = ls_top; + return; case ls_comma: if (!((parser_state == ps_dict_after_item) || @@ -1166,7 +1168,16 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected comma"); } - break; + if (parser_state == ps_dict_after_item) { + parser_state = ps_dict_after_comma; + } else if (parser_state == ps_array_after_item) { + parser_state = ps_array_after_comma; + } else { + throw std::logic_error("JSONParser::handleToken: unexpected parser" + " state for comma"); + } + lex_state = ls_top; + return; case ls_end_array: if (!((parser_state == ps_array_begin) || @@ -1275,18 +1286,7 @@ JSONParser::handleToken() // whatever we need to do with it. parser_state_e next_state = ps_top; - if (lex_state == ls_colon) { - next_state = ps_dict_after_colon; - } else if (lex_state == ls_comma) { - if (parser_state == ps_dict_after_item) { - next_state = ps_dict_after_comma; - } else if (parser_state == ps_array_after_item) { - next_state = ps_array_after_comma; - } else { - throw std::logic_error("JSONParser::handleToken: unexpected parser" - " state for comma"); - } - } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) { + if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) { next_state = ps_stack.back(); ps_stack.pop_back(); auto tos = stack.back(); @@ -1348,9 +1348,6 @@ JSONParser::handleToken() throw std::logic_error( "JSONParser::handleToken: unexpected parser state"); } - } else { - throw std::logic_error( - "JSONParser::handleToken: unexpected null item in transition"); } if (reactor && item.get()) { -- cgit v1.2.3-54-g00ecf From 77ceebd6c9ef227be3adda6f5ba2a1211e5c6140 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 12:50:14 +0000 Subject: In JSONParser::handleToken move processing for ls_end_array and ls_end_dict into switch statement --- libqpdf/JSON.cc | 127 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 20b2609a..9775bc39 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1137,6 +1137,7 @@ JSONParser::handleToken() std::string s_value; std::shared_ptr item; + auto tos = stack.empty() ? nullptr : stack.back(); switch (lex_state) { case ls_begin_dict: @@ -1180,16 +1181,24 @@ JSONParser::handleToken() return; case ls_end_array: - if (!((parser_state == ps_array_begin) || - (parser_state == ps_array_after_item))) - - { + if (!(parser_state == ps_array_begin || + parser_state == ps_array_after_item)) { QTC::TC("libtests", "JSON parse unexpected ]"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter"); } - break; + parser_state = ps_stack.back(); + ps_stack.pop_back(); + tos->setEnd(offset); + if (reactor) { + reactor->containerEnd(*tos); + } + if (parser_state != ps_done) { + stack.pop_back(); + } + lex_state = ls_top; + return; case ls_end_dict: if (!((parser_state == ps_dict_begin) || @@ -1199,7 +1208,17 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter"); } - break; + parser_state = ps_stack.back(); + ps_stack.pop_back(); + tos->setEnd(offset); + if (reactor) { + reactor->containerEnd(*tos); + } + if (parser_state != ps_done) { + stack.pop_back(); + } + lex_state = ls_top; + return; case ls_number: item = std::make_shared(JSON::makeNumber(token)); @@ -1286,68 +1305,52 @@ JSONParser::handleToken() // whatever we need to do with it. parser_state_e next_state = ps_top; - if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) { - next_state = ps_stack.back(); - ps_stack.pop_back(); - auto tos = stack.back(); - tos->setEnd(offset); - if (reactor) { - reactor->containerEnd(*tos); - } - if (next_state != ps_done) { - stack.pop_back(); + + if (!(item->isArray() || item->isDictionary())) { + item->setStart(token_start); + item->setEnd(offset); + } + + switch (parser_state) { + case ps_dict_begin: + case ps_dict_after_comma: + this->dict_key = s_value; + this->dict_key_offset = item->getStart(); + item = nullptr; + next_state = ps_dict_after_key; + break; + + case ps_dict_after_colon: + if (tos->checkDictionaryKeySeen(dict_key)) { + QTC::TC("libtests", "JSON parse duplicate key"); + throw std::runtime_error( + "JSON: offset " + std::to_string(dict_key_offset) + + ": duplicated dictionary key"); } - } else if (item.get()) { - if (!(item->isArray() || item->isDictionary())) { - item->setStart(token_start); - item->setEnd(offset); + if (!reactor || !reactor->dictionaryItem(dict_key, *item)) { + tos->addDictionaryMember(dict_key, *item); } + next_state = ps_dict_after_item; + break; - std::shared_ptr tos; - if (!stack.empty()) { - tos = stack.back(); + case ps_array_begin: + case ps_array_after_comma: + if (!reactor || !reactor->arrayItem(*item)) { + tos->addArrayElement(*item); } - switch (parser_state) { - case ps_dict_begin: - case ps_dict_after_comma: - this->dict_key = s_value; - this->dict_key_offset = item->getStart(); - item = nullptr; - next_state = ps_dict_after_key; - break; - - case ps_dict_after_colon: - if (tos->checkDictionaryKeySeen(dict_key)) { - QTC::TC("libtests", "JSON parse duplicate key"); - throw std::runtime_error( - "JSON: offset " + std::to_string(dict_key_offset) + - ": duplicated dictionary key"); - } - if (!reactor || !reactor->dictionaryItem(dict_key, *item)) { - tos->addDictionaryMember(dict_key, *item); - } - next_state = ps_dict_after_item; - break; - - case ps_array_begin: - case ps_array_after_comma: - if (!reactor || !reactor->arrayItem(*item)) { - tos->addArrayElement(*item); - } - next_state = ps_array_after_item; - break; + next_state = ps_array_after_item; + break; - case ps_top: - next_state = ps_done; - break; + case ps_top: + next_state = ps_done; + break; - case ps_dict_after_key: - case ps_dict_after_item: - case ps_array_after_item: - case ps_done: - throw std::logic_error( - "JSONParser::handleToken: unexpected parser state"); - } + case ps_dict_after_key: + case ps_dict_after_item: + case ps_array_after_item: + case ps_done: + throw std::logic_error( + "JSONParser::handleToken: unexpected parser state"); } if (reactor && item.get()) { -- cgit v1.2.3-54-g00ecf From 0de032bcdd49d50df6a3e4a2e6325e5144c4619e Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 11:12:30 +0000 Subject: In JSONParser::handleToken simplify setting of start and end --- libqpdf/JSON.cc | 8 ++------ libtests/qtest/json_parse/good-01-react.out | 4 ++-- libtests/qtest/json_parse/good-04-react.out | 10 +++++----- libtests/qtest/json_parse/good-10-react.out | 10 +++++----- libtests/qtest/json_parse/good-11-react.out | 4 ++-- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 9775bc39..1749005b 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1142,12 +1142,10 @@ JSONParser::handleToken() switch (lex_state) { case ls_begin_dict: item = std::make_shared(JSON::makeDictionary()); - item->setStart(token_start); break; case ls_begin_array: item = std::make_shared(JSON::makeArray()); - item->setStart(token_start); break; case ls_colon: @@ -1306,10 +1304,8 @@ JSONParser::handleToken() parser_state_e next_state = ps_top; - if (!(item->isArray() || item->isDictionary())) { - item->setStart(token_start); - item->setEnd(offset); - } + item->setStart(token_start); + item->setEnd(offset); switch (parser_state) { case ps_dict_begin: diff --git a/libtests/qtest/json_parse/good-01-react.out b/libtests/qtest/json_parse/good-01-react.out index e3813bcc..3951272d 100644 --- a/libtests/qtest/json_parse/good-01-react.out +++ b/libtests/qtest/json_parse/good-01-react.out @@ -1,13 +1,13 @@ dictionary start dictionary item: a -> [6, 11): "bcd" -dictionary item: e -> [18, 0): [] +dictionary item: e -> [18, 19): [] array start array item: [19, 20): 1 array item: [41, 42): 2 array item: [44, 45): 3 array item: [46, 47): 4 array item: [48, 54): "five" -array item: [56, 0): {} +array item: [56, 57): {} dictionary start dictionary item: six -> [64, 65): 7 dictionary item: 8 -> [72, 73): 9 diff --git a/libtests/qtest/json_parse/good-04-react.out b/libtests/qtest/json_parse/good-04-react.out index ded004b2..8d931535 100644 --- a/libtests/qtest/json_parse/good-04-react.out +++ b/libtests/qtest/json_parse/good-04-react.out @@ -1,15 +1,15 @@ array start -array item: [1, 0): [] +array item: [1, 2): [] array start -array item: [2, 0): [] +array item: [2, 3): [] array start -array item: [3, 0): {} +array item: [3, 4): {} dictionary start container end: [3, 5): {} container end: [2, 6): [] -array item: [8, 0): {} +array item: [8, 9): {} dictionary start -dictionary item: -> [13, 0): {} +dictionary item: -> [13, 14): {} dictionary start container end: [13, 15): {} container end: [8, 16): {} diff --git a/libtests/qtest/json_parse/good-10-react.out b/libtests/qtest/json_parse/good-10-react.out index 3cceeb2f..8c31f915 100644 --- a/libtests/qtest/json_parse/good-10-react.out +++ b/libtests/qtest/json_parse/good-10-react.out @@ -1,21 +1,21 @@ dictionary start -dictionary item: a -> [9, 0): [] +dictionary item: a -> [9, 10): [] array start array item: [10, 11): 1 array item: [13, 14): 2 -array item: [16, 0): {} +array item: [16, 17): {} dictionary start dictionary item: x -> [22, 25): "y" container end: [16, 26): {} array item: [28, 29): 3 -array item: [31, 0): {} +array item: [31, 32): {} dictionary start dictionary item: keep -> [40, 61): "not in final output" container end: [31, 62): { "keep": "not in final output" } container end: [9, 63): [] -dictionary item: keep -> [75, 0): [] +dictionary item: keep -> [75, 76): [] array start array item: [76, 77): 1 array item: [79, 83): null @@ -23,7 +23,7 @@ array item: [85, 86): 2 array item: [88, 93): false array item: [95, 101): "keep" array item: [103, 104): 3 -array item: [106, 0): [] +array item: [106, 107): [] array start array item: [107, 113): "this" array item: [115, 121): "keep" diff --git a/libtests/qtest/json_parse/good-11-react.out b/libtests/qtest/json_parse/good-11-react.out index 6cf3345e..6d7d4275 100644 --- a/libtests/qtest/json_parse/good-11-react.out +++ b/libtests/qtest/json_parse/good-11-react.out @@ -1,12 +1,12 @@ array start -array item: [4, 0): [] +array item: [4, 5): [] array start array item: [5, 11): "u:π" array item: [13, 23): "u:π" array item: [25, 39): "b:EFBBBFCF80" array item: [41, 53): "b:feff03c0" container end: [4, 54): [] -array item: [58, 0): [] +array item: [58, 59): [] array start array item: [59, 67): "u:🥔" array item: [69, 85): "u:🥔" -- cgit v1.2.3-54-g00ecf From f2e46c20b62aa72a984b99c816176cfa3367a6e7 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 13:08:55 +0000 Subject: In JSONParser::handleToken move remaining validations into second switch statement --- libqpdf/JSON.cc | 71 +++++++++++++++++---------------------------------------- 1 file changed, 21 insertions(+), 50 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 1749005b..174a46b6 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1252,56 +1252,6 @@ JSONParser::handleToken() break; } - // See whether what we have is allowed at this point. - - if (item.get()) { - switch (parser_state) { - case ps_done: - throw std::logic_error("can't happen; ps_done already handled"); - break; - - case ps_dict_after_key: - QTC::TC("libtests", "JSON parse expected colon"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + ": expected ':'"); - break; - - case ps_dict_after_item: - QTC::TC("libtests", "JSON parse expected , or }"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": expected ',' or '}'"); - break; - - case ps_array_after_item: - QTC::TC("libtests", "JSON parse expected, or ]"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": expected ',' or ']'"); - break; - - case ps_dict_begin: - case ps_dict_after_comma: - if (lex_state != ls_string) { - QTC::TC("libtests", "JSON parse string as dict key"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": expect string as dictionary key"); - } - break; - - case ps_top: - case ps_dict_after_colon: - case ps_array_begin: - case ps_array_after_comma: - break; - // okay - } - } - - // Now we know we have a delimiter or item that is allowed. Do - // whatever we need to do with it. - parser_state_e next_state = ps_top; item->setStart(token_start); @@ -1310,6 +1260,12 @@ JSONParser::handleToken() switch (parser_state) { case ps_dict_begin: case ps_dict_after_comma: + if (lex_state != ls_string) { + QTC::TC("libtests", "JSON parse string as dict key"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": expect string as dictionary key"); + } this->dict_key = s_value; this->dict_key_offset = item->getStart(); item = nullptr; @@ -1342,8 +1298,23 @@ JSONParser::handleToken() break; case ps_dict_after_key: + QTC::TC("libtests", "JSON parse expected colon"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + ": expected ':'"); + break; + case ps_dict_after_item: + QTC::TC("libtests", "JSON parse expected , or }"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'"); + break; + case ps_array_after_item: + QTC::TC("libtests", "JSON parse expected, or ]"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'"); + break; + case ps_done: throw std::logic_error( "JSONParser::handleToken: unexpected parser state"); -- cgit v1.2.3-54-g00ecf From a39043f65eebdc24d98bdc29160ad489222c4ec3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 13:35:02 +0000 Subject: In JSONParser::handleToken avoid creating JSON objects for dictionary keys --- libqpdf/JSON.cc | 59 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 174a46b6..a16718de 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1138,8 +1138,10 @@ JSONParser::handleToken() std::string s_value; std::shared_ptr item; auto tos = stack.empty() ? nullptr : stack.back(); + auto ls = lex_state; + lex_state = ls_top; - switch (lex_state) { + switch (ls) { case ls_begin_dict: item = std::make_shared(JSON::makeDictionary()); break; @@ -1156,7 +1158,6 @@ JSONParser::handleToken() ": unexpected colon"); } parser_state = ps_dict_after_colon; - lex_state = ls_top; return; case ls_comma: @@ -1175,7 +1176,6 @@ JSONParser::handleToken() throw std::logic_error("JSONParser::handleToken: unexpected parser" " state for comma"); } - lex_state = ls_top; return; case ls_end_array: @@ -1195,7 +1195,6 @@ JSONParser::handleToken() if (parser_state != ps_done) { stack.pop_back(); } - lex_state = ls_top; return; case ls_end_dict: @@ -1215,7 +1214,6 @@ JSONParser::handleToken() if (parser_state != ps_done) { stack.pop_back(); } - lex_state = ls_top; return; case ls_number: @@ -1243,7 +1241,15 @@ JSONParser::handleToken() throw std::logic_error("JSON string length < 2"); } s_value = decode_string(token, token_start); - item = std::make_shared(JSON::makeString(s_value)); + if (parser_state == ps_dict_begin || + parser_state == ps_dict_after_comma) { + dict_key = s_value; + dict_key_offset = token_start; + parser_state = ps_dict_after_key; + return; + } else { + item = std::make_shared(JSON::makeString(s_value)); + } break; default: @@ -1260,16 +1266,10 @@ JSONParser::handleToken() switch (parser_state) { case ps_dict_begin: case ps_dict_after_comma: - if (lex_state != ls_string) { - QTC::TC("libtests", "JSON parse string as dict key"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": expect string as dictionary key"); - } - this->dict_key = s_value; - this->dict_key_offset = item->getStart(); - item = nullptr; - next_state = ps_dict_after_key; + QTC::TC("libtests", "JSON parse string as dict key"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": expect string as dictionary key"); break; case ps_dict_after_colon: @@ -1320,7 +1320,7 @@ JSONParser::handleToken() "JSONParser::handleToken: unexpected parser state"); } - if (reactor && item.get()) { + if (reactor) { // Calling container start method is postponed until after // adding the containers to their parent containers, if any. // This makes it much easier to keep track of the current @@ -1333,26 +1333,25 @@ JSONParser::handleToken() } // Prepare for next token - if (item.get()) { - if (item->isDictionary()) { - stack.push_back(item); - ps_stack.push_back(next_state); - next_state = ps_dict_begin; - } else if (item->isArray()) { - stack.push_back(item); - ps_stack.push_back(next_state); - next_state = ps_array_begin; - } else if (parser_state == ps_top) { - stack.push_back(item); - } + + if (item->isDictionary()) { + stack.push_back(item); + ps_stack.push_back(next_state); + next_state = ps_dict_begin; + } else if (item->isArray()) { + stack.push_back(item); + ps_stack.push_back(next_state); + next_state = ps_array_begin; + } else if (parser_state == ps_top) { + stack.push_back(item); } + if (ps_stack.size() > 500) { throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": maximum object depth exceeded"); } parser_state = next_state; - lex_state = ls_top; } std::shared_ptr -- cgit v1.2.3-54-g00ecf From 29093a167b3f628d23b5a7890404eab659c6a685 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 14:37:25 +0000 Subject: In JSONParser::handleToken refactor container creation --- libqpdf/JSON.cc | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index a16718de..41481660 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1294,6 +1294,7 @@ JSONParser::handleToken() break; case ps_top: + stack.push_back(item); next_state = ps_done; break; @@ -1320,36 +1321,30 @@ JSONParser::handleToken() "JSONParser::handleToken: unexpected parser state"); } - if (reactor) { + if (item->isDictionary() || item->isArray()) { + stack.push_back(item); + ps_stack.push_back(next_state); // Calling container start method is postponed until after // adding the containers to their parent containers, if any. // This makes it much easier to keep track of the current // nesting level. if (item->isDictionary()) { - reactor->dictionaryStart(); + if (reactor) { + reactor->dictionaryStart(); + } + next_state = ps_dict_begin; } else if (item->isArray()) { - reactor->arrayStart(); + if (reactor) { + reactor->arrayStart(); + } + next_state = ps_array_begin; } - } - - // Prepare for next token - if (item->isDictionary()) { - stack.push_back(item); - ps_stack.push_back(next_state); - next_state = ps_dict_begin; - } else if (item->isArray()) { - stack.push_back(item); - ps_stack.push_back(next_state); - next_state = ps_array_begin; - } else if (parser_state == ps_top) { - stack.push_back(item); - } - - if (ps_stack.size() > 500) { - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": maximum object depth exceeded"); + if (ps_stack.size() > 500) { + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": maximum object depth exceeded"); + } } parser_state = next_state; } -- cgit v1.2.3-54-g00ecf From 6748bd33f75da96fc45d189028207a392c421eec Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 15:39:16 +0000 Subject: In JSONParser::handleToken remove next_state --- libqpdf/JSON.cc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 41481660..8c69c4e9 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1258,8 +1258,6 @@ JSONParser::handleToken() break; } - parser_state_e next_state = ps_top; - item->setStart(token_start); item->setEnd(offset); @@ -1282,7 +1280,7 @@ JSONParser::handleToken() if (!reactor || !reactor->dictionaryItem(dict_key, *item)) { tos->addDictionaryMember(dict_key, *item); } - next_state = ps_dict_after_item; + parser_state = ps_dict_after_item; break; case ps_array_begin: @@ -1290,12 +1288,16 @@ JSONParser::handleToken() if (!reactor || !reactor->arrayItem(*item)) { tos->addArrayElement(*item); } - next_state = ps_array_after_item; + parser_state = ps_array_after_item; break; case ps_top: - stack.push_back(item); - next_state = ps_done; + if (!(item->isDictionary() || item->isArray())) { + stack.push_back(item); + parser_state = ps_done; + return; + } + parser_state = ps_done; break; case ps_dict_after_key: @@ -1323,7 +1325,7 @@ JSONParser::handleToken() if (item->isDictionary() || item->isArray()) { stack.push_back(item); - ps_stack.push_back(next_state); + ps_stack.push_back(parser_state); // Calling container start method is postponed until after // adding the containers to their parent containers, if any. // This makes it much easier to keep track of the current @@ -1332,12 +1334,12 @@ JSONParser::handleToken() if (reactor) { reactor->dictionaryStart(); } - next_state = ps_dict_begin; + parser_state = ps_dict_begin; } else if (item->isArray()) { if (reactor) { reactor->arrayStart(); } - next_state = ps_array_begin; + parser_state = ps_array_begin; } if (ps_stack.size() > 500) { @@ -1346,7 +1348,6 @@ JSONParser::handleToken() ": maximum object depth exceeded"); } } - parser_state = next_state; } std::shared_ptr -- cgit v1.2.3-54-g00ecf From 126dd31cad27991d40805df9582d5546464310c1 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 14:37:25 +0000 Subject: In JSONParser combine stacks --- libqpdf/JSON.cc | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 8c69c4e9..2a722e67 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -661,6 +661,18 @@ namespace ls_comma, }; + struct StackFrame + { + StackFrame(parser_state_e state, std::shared_ptr& item) : + state(state), + item(item) + { + } + + parser_state_e state; + std::shared_ptr item; + }; + InputSource& is; JSON::Reactor* reactor; lex_state_e lex_state; @@ -673,8 +685,7 @@ namespace std::string token; qpdf_offset_t token_start{0}; parser_state_e parser_state; - std::vector> stack; - std::vector ps_stack; + std::vector stack; std::string dict_key; qpdf_offset_t dict_key_offset; }; @@ -1137,7 +1148,7 @@ JSONParser::handleToken() std::string s_value; std::shared_ptr item; - auto tos = stack.empty() ? nullptr : stack.back(); + auto tos = stack.empty() ? nullptr : stack.back().item; auto ls = lex_state; lex_state = ls_top; @@ -1186,8 +1197,7 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter"); } - parser_state = ps_stack.back(); - ps_stack.pop_back(); + parser_state = stack.back().state; tos->setEnd(offset); if (reactor) { reactor->containerEnd(*tos); @@ -1205,8 +1215,7 @@ JSONParser::handleToken() "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter"); } - parser_state = ps_stack.back(); - ps_stack.pop_back(); + parser_state = stack.back().state; tos->setEnd(offset); if (reactor) { reactor->containerEnd(*tos); @@ -1293,7 +1302,7 @@ JSONParser::handleToken() case ps_top: if (!(item->isDictionary() || item->isArray())) { - stack.push_back(item); + stack.push_back({ps_done, item}); parser_state = ps_done; return; } @@ -1324,8 +1333,7 @@ JSONParser::handleToken() } if (item->isDictionary() || item->isArray()) { - stack.push_back(item); - ps_stack.push_back(parser_state); + stack.push_back({parser_state, item}); // Calling container start method is postponed until after // adding the containers to their parent containers, if any. // This makes it much easier to keep track of the current @@ -1342,7 +1350,7 @@ JSONParser::handleToken() parser_state = ps_array_begin; } - if (ps_stack.size() > 500) { + if (stack.size() > 500) { throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": maximum object depth exceeded"); @@ -1361,7 +1369,7 @@ JSONParser::parse() QTC::TC("libtests", "JSON parse premature EOF"); throw std::runtime_error("JSON: premature end of input"); } - auto const& tos = stack.back(); + auto const& tos = stack.back().item; if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) { reactor->topLevelScalar(); } -- cgit v1.2.3-54-g00ecf From a9a0667904b467a054b5f7747bc16afba2612d7f Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 17:03:24 +0000 Subject: Make JSONParser::getToken responsible for decoding strings --- libqpdf/JSON.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 2a722e67..c043d570 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1052,6 +1052,9 @@ JSONParser::getToken() case ls_string: if (*p == '"') { + token += '"'; + token = decode_string(token, token_start); + action = ignore; ready = true; } else if (*p == '\\') { lex_state = ls_backslash; @@ -1146,7 +1149,6 @@ JSONParser::handleToken() ": material follows end of object: " + token); } - std::string s_value; std::shared_ptr item; auto tos = stack.empty() ? nullptr : stack.back().item; auto ls = lex_state; @@ -1245,19 +1247,14 @@ JSONParser::handleToken() break; case ls_string: - // Token includes the quotation marks - if (token.length() < 2) { - throw std::logic_error("JSON string length < 2"); - } - s_value = decode_string(token, token_start); if (parser_state == ps_dict_begin || parser_state == ps_dict_after_comma) { - dict_key = s_value; + dict_key = token; dict_key_offset = token_start; parser_state = ps_dict_after_key; return; } else { - item = std::make_shared(JSON::makeString(s_value)); + item = std::make_shared(JSON::makeString(token)); } break; -- cgit v1.2.3-54-g00ecf From cee746fc154e82df43f427b4e6112fcb50070814 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 17:13:06 +0000 Subject: In JSONParser::getToken avoid copying '"' characters in strings --- libqpdf/JSON.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c043d570..51617483 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -744,14 +744,8 @@ JSONParser::decode_string(std::string const& str, qpdf_offset_t offset) // is called, so errors are logic errors instead of runtime // errors. size_t len = str.length(); - if ((len < 2) || (str.at(0) != '"') || (str.at(len - 1) != '"')) { - throw std::logic_error( - "JSON Parse: decode_string called with other than \"...\""); - } char const* s = str.c_str(); - // Move inside the quotation marks - ++s; - len -= 2; + // Keep track of UTF-16 surrogate pairs. unsigned long high_surrogate = 0; qpdf_offset_t high_offset = 0; @@ -878,6 +872,7 @@ JSONParser::getToken() token_start = offset; if (*p == '"') { lex_state = ls_string; + action = ignore; } else if (QUtil::is_space(*p)) { action = ignore; } else if (*p == ',') { @@ -1052,7 +1047,6 @@ JSONParser::getToken() case ls_string: if (*p == '"') { - token += '"'; token = decode_string(token, token_start); action = ignore; ready = true; -- cgit v1.2.3-54-g00ecf From 320245e0d1b4be709abe98a2aa7ed1e4b9054af8 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 18:02:35 +0000 Subject: In JSONParser::getToken decode escaped chars inside state ls_backslash (except '\\' and '\uXXXX') --- libqpdf/JSON.cc | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 51617483..f92d3ef8 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -790,7 +790,6 @@ JSONParser::decode_string(std::string const& str, qpdf_offset_t offset) i += 4; break; default: - throw std::logic_error("JSON parse: bad character after \\"); break; } } else { @@ -1052,17 +1051,43 @@ JSONParser::getToken() ready = true; } else if (*p == '\\') { lex_state = ls_backslash; + action = ignore; } break; case ls_backslash: - /* cSpell: ignore bfnrt */ - if (strchr("\\\"/bfnrt", *p)) { - lex_state = ls_string; - } else if (*p == 'u') { + action = ignore; + lex_state = ls_string; + switch (*p) { + case '\\': + token += "\\\\"; + case '\"': + case '/': + // \/ is allowed in json input, but so is /, so we + // don't map / to \/ in output. + token += *p; + break; + case 'b': + token += '\b'; + break; + case 'f': + token += '\f'; + break; + case 'n': + token += '\n'; + break; + case 'r': + token += '\r'; + break; + case 't': + token += '\t'; + break; + case 'u': + token += "\\u"; lex_state = ls_u4; u_count = 0; - } else { + break; + default: QTC::TC("libtests", "JSON parse backslash bad character"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + -- cgit v1.2.3-54-g00ecf From 98d9ae51fc4e1a6967b52e7708f6ddc66c684276 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 27 Jan 2023 18:58:50 +0000 Subject: Integrate JSONParser::decode_string into getToken --- libqpdf/JSON.cc | 117 +++++++++++++++----------------------------------------- 1 file changed, 30 insertions(+), 87 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index f92d3ef8..b057bfdd 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -616,12 +616,9 @@ namespace void getToken(); void handleToken(); void numberError(); - static std::string - decode_string(std::string const& json, qpdf_offset_t offset); static void handle_u_code( - char const* s, + unsigned long codepoint, qpdf_offset_t offset, - qpdf_offset_t i, unsigned long& high_surrogate, qpdf_offset_t& high_offset, std::string& result); @@ -680,6 +677,7 @@ namespace size_t bytes; char const* p; qpdf_offset_t u_count; + unsigned long u_value{0}; qpdf_offset_t offset; bool done; std::string token; @@ -693,22 +691,15 @@ namespace void JSONParser::handle_u_code( - char const* s, + unsigned long codepoint, qpdf_offset_t offset, - qpdf_offset_t i, unsigned long& high_surrogate, qpdf_offset_t& high_offset, std::string& result) { - std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5)); - unsigned char high = static_cast(hex.at(0)); - unsigned char low = static_cast(hex.at(1)); - unsigned long codepoint = high; - codepoint <<= 8; - codepoint += low; if ((codepoint & 0xFC00) == 0xD800) { // high surrogate - qpdf_offset_t new_high_offset = offset + i; + qpdf_offset_t new_high_offset = offset; if (high_offset) { QTC::TC("libtests", "JSON 16 high high"); throw std::runtime_error( @@ -721,10 +712,10 @@ JSONParser::handle_u_code( high_surrogate = codepoint; } else if ((codepoint & 0xFC00) == 0xDC00) { // low surrogate - if (offset + i != (high_offset + 6)) { + if (offset != (high_offset + 6)) { QTC::TC("libtests", "JSON 16 low not after high"); throw std::runtime_error( - "JSON: offset " + std::to_string(offset + i) + + "JSON: offset " + std::to_string(offset) + ": UTF-16 low surrogate found not immediately after high" " surrogate"); } @@ -737,74 +728,6 @@ JSONParser::handle_u_code( } } -std::string -JSONParser::decode_string(std::string const& str, qpdf_offset_t offset) -{ - // The string has already been validated when this private method - // is called, so errors are logic errors instead of runtime - // errors. - size_t len = str.length(); - char const* s = str.c_str(); - - // Keep track of UTF-16 surrogate pairs. - unsigned long high_surrogate = 0; - qpdf_offset_t high_offset = 0; - std::string result; - qpdf_offset_t olen = toO(len); - for (qpdf_offset_t i = 0; i < olen; ++i) { - if (s[i] == '\\') { - if (i + 1 >= olen) { - throw std::logic_error("JSON parse: nothing after \\"); - } - char ch = s[++i]; - switch (ch) { - case '\\': - case '\"': - case '/': - // \/ is allowed in json input, but so is /, so we - // don't map / to \/ in output. - result.append(1, ch); - break; - case 'b': - result.append(1, '\b'); - break; - case 'f': - result.append(1, '\f'); - break; - case 'n': - result.append(1, '\n'); - break; - case 'r': - result.append(1, '\r'); - break; - case 't': - result.append(1, '\t'); - break; - case 'u': - if (i + 4 >= olen) { - throw std::logic_error( - "JSON parse: not enough characters after \\u"); - } - handle_u_code( - s, offset, i, high_surrogate, high_offset, result); - i += 4; - break; - default: - break; - } - } else { - result.append(1, s[i]); - } - } - if (high_offset) { - QTC::TC("libtests", "JSON 16 dangling high"); - throw std::runtime_error( - "JSON: offset " + std::to_string(high_offset) + - ": UTF-16 high surrogate not followed by low surrogate"); - } - return result; -} - void JSONParser::numberError() { @@ -850,6 +773,11 @@ JSONParser::getToken() enum { append, ignore, reread } action = append; bool ready = false; token.clear(); + + // Keep track of UTF-16 surrogate pairs. + unsigned long high_surrogate = 0; + qpdf_offset_t high_offset = 0; + while (!done) { if (p == (buf + bytes)) { p = buf; @@ -1046,7 +974,13 @@ JSONParser::getToken() case ls_string: if (*p == '"') { - token = decode_string(token, token_start); + if (high_offset) { + QTC::TC("libtests", "JSON 16 dangling high"); + throw std::runtime_error( + "JSON: offset " + std::to_string(high_offset) + + ": UTF-16 high surrogate not followed by low " + "surrogate"); + } action = ignore; ready = true; } else if (*p == '\\') { @@ -1060,7 +994,6 @@ JSONParser::getToken() lex_state = ls_string; switch (*p) { case '\\': - token += "\\\\"; case '\"': case '/': // \/ is allowed in json input, but so is /, so we @@ -1083,9 +1016,9 @@ JSONParser::getToken() token += '\t'; break; case 'u': - token += "\\u"; lex_state = ls_u4; u_count = 0; + u_value = 0; break; default: QTC::TC("libtests", "JSON parse backslash bad character"); @@ -1097,13 +1030,23 @@ JSONParser::getToken() break; case ls_u4: - if (!QUtil::is_hex_digit(*p)) { + using ui = unsigned int; + action = ignore; + if ('0' <= *p && *p <= '9') { + u_value = 16 * u_value + (ui(*p) - ui('0')); + } else if ('a' <= *p && *p <= 'f') { + u_value = 16 * u_value + (10 + ui(*p) - ui('a')); + } else if ('A' <= *p && *p <= 'F') { + u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + } else { QTC::TC("libtests", "JSON parse bad hex after u"); throw std::runtime_error( "JSON: offset " + std::to_string(offset - u_count - 1) + ": \\u must be followed by four hex digits"); } if (++u_count == 4) { + handle_u_code( + u_value, offset - 5, high_surrogate, high_offset, token); lex_state = ls_string; } break; -- cgit v1.2.3-54-g00ecf From 1b89e7684edc7af2ad4ae998bba41b40f8780c3f Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 28 Jan 2023 10:42:29 +0000 Subject: Remove redundant template toO in JSON --- libqpdf/JSON.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index b057bfdd..b972d8aa 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -4,19 +4,11 @@ #include #include #include -#include #include #include #include #include -template -static qpdf_offset_t -toO(T const& i) -{ - return QIntC::to_offset(i); -} - JSON::Members::Members(std::shared_ptr value) : value(value), start(0), -- cgit v1.2.3-54-g00ecf From 5ac6a12e0a76613d29edc65beb6b99af45172493 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 11:39:15 +0000 Subject: In JSONParser::getToken reject illegal control characters --- libqpdf/JSON.cc | 20 ++++++++++++++++---- libtests/qtest/json_parse.test | 4 ++++ libtests/qtest/json_parse/bad-18.out | 2 +- libtests/qtest/json_parse/bad-45.json | 1 + libtests/qtest/json_parse/bad-45.out | 1 + libtests/qtest/json_parse/bad-46.json | 1 + libtests/qtest/json_parse/bad-46.out | 1 + libtests/qtest/json_parse/bad-47.json | 2 ++ libtests/qtest/json_parse/bad-47.out | 1 + libtests/qtest/json_parse/bad-48.json | 1 + libtests/qtest/json_parse/bad-48.out | 1 + 11 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 libtests/qtest/json_parse/bad-45.json create mode 100644 libtests/qtest/json_parse/bad-45.out create mode 100644 libtests/qtest/json_parse/bad-46.json create mode 100644 libtests/qtest/json_parse/bad-46.out create mode 100644 libtests/qtest/json_parse/bad-47.json create mode 100644 libtests/qtest/json_parse/bad-47.out create mode 100644 libtests/qtest/json_parse/bad-48.json create mode 100644 libtests/qtest/json_parse/bad-48.out diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index b972d8aa..da0de9eb 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -780,10 +780,22 @@ JSONParser::getToken() } } - if (*p == 0) { - QTC::TC("libtests", "JSON parse null character"); - throw std::runtime_error( - "JSON: null character at offset " + std::to_string(offset)); + if ((*p < 32 && *p >= 0)) { + if (*p == '\t' || *p == '\n' || *p == '\r') { + // Legal white space not permitted in strings. This will always + // end the current token (unless we are still before the start + // of the token). + if (lex_state == ls_top) { + // Continue with token + } else { + // done + } + } else { + QTC::TC("libtests", "JSON parse null character"); + throw std::runtime_error( + "JSON: control or null character at offset " + + std::to_string(offset)); + } } action = append; switch (lex_state) { diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index d38d70de..8234b755 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -125,6 +125,10 @@ my @bad = ( "e after minus", # 42 "missing digit after e", # 43 "missing digit after e+/-", # 44 + # "tab char in string", # 45 + # "cr char in string", # 46 + # "lf char in string", # 47 + # "bs char in string", # 48 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-18.out b/libtests/qtest/json_parse/bad-18.out index 0428b64f..1e779e41 100644 --- a/libtests/qtest/json_parse/bad-18.out +++ b/libtests/qtest/json_parse/bad-18.out @@ -1 +1 @@ -exception: bad-18.json: JSON: null character at offset 5 +exception: bad-18.json: JSON: control or null character at offset 5 diff --git a/libtests/qtest/json_parse/bad-45.json b/libtests/qtest/json_parse/bad-45.json new file mode 100644 index 00000000..16107dc0 --- /dev/null +++ b/libtests/qtest/json_parse/bad-45.json @@ -0,0 +1 @@ +"Tab in str ing" diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out new file mode 100644 index 00000000..ba7e4f16 --- /dev/null +++ b/libtests/qtest/json_parse/bad-45.out @@ -0,0 +1 @@ +"Tab in str\ting" diff --git a/libtests/qtest/json_parse/bad-46.json b/libtests/qtest/json_parse/bad-46.json new file mode 100644 index 00000000..60873bf4 --- /dev/null +++ b/libtests/qtest/json_parse/bad-46.json @@ -0,0 +1 @@ +"cr in str ing" diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out new file mode 100644 index 00000000..2baad6a4 --- /dev/null +++ b/libtests/qtest/json_parse/bad-46.out @@ -0,0 +1 @@ +"cr in str\ring" diff --git a/libtests/qtest/json_parse/bad-47.json b/libtests/qtest/json_parse/bad-47.json new file mode 100644 index 00000000..3c75427a --- /dev/null +++ b/libtests/qtest/json_parse/bad-47.json @@ -0,0 +1,2 @@ +"lf in str +ing" diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out new file mode 100644 index 00000000..30549072 --- /dev/null +++ b/libtests/qtest/json_parse/bad-47.out @@ -0,0 +1 @@ +"lf in str\ning" diff --git a/libtests/qtest/json_parse/bad-48.json b/libtests/qtest/json_parse/bad-48.json new file mode 100644 index 00000000..1e605808 --- /dev/null +++ b/libtests/qtest/json_parse/bad-48.json @@ -0,0 +1 @@ +"bs in string" \ No newline at end of file diff --git a/libtests/qtest/json_parse/bad-48.out b/libtests/qtest/json_parse/bad-48.out new file mode 100644 index 00000000..0b20fc7a --- /dev/null +++ b/libtests/qtest/json_parse/bad-48.out @@ -0,0 +1 @@ +exception: bad-48.json: JSON: control or null character at offset 10 -- cgit v1.2.3-54-g00ecf From f5b7448a2732d0e6f39855b98ebca63df2824916 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 12:23:15 +0000 Subject: Extend scope of JSONParser::numberError and rename tokenError Handle all incomplete token type errors in tokenError. --- libqpdf/JSON.cc | 58 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index da0de9eb..afeda315 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -607,7 +607,7 @@ namespace private: void getToken(); void handleToken(); - void numberError(); + void tokenError(); static void handle_u_code( unsigned long codepoint, qpdf_offset_t offset, @@ -721,8 +721,29 @@ JSONParser::handle_u_code( } void -JSONParser::numberError() +JSONParser::tokenError() { + if (bytes == 0) { + QTC::TC("libtests", "JSON parse ls premature end of input"); + throw std::runtime_error("JSON: premature end of input"); + } + if (lex_state == ls_u4) { + QTC::TC("libtests", "JSON parse bad hex after u"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset - u_count - 1) + + ": \\u must be followed by four hex digits"); + } else if (lex_state == ls_alpha) { + QTC::TC("libtests", "JSON parse keyword bad character"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": keyword: unexpected character " + std::string(p, 1)); + } else if (lex_state == ls_backslash) { + QTC::TC("libtests", "JSON parse backslash bad character"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": invalid character after backslash: " + std::string(p, 1)); + } + if (*p == '.') { if (lex_state == ls_number || lex_state == ls_number_e || lex_state == ls_number_e_sign) { @@ -751,6 +772,7 @@ JSONParser::numberError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: incomplete number"); + } else { QTC::TC("libtests", "JSON parse numeric bad character"); throw std::runtime_error( @@ -896,7 +918,7 @@ JSONParser::getToken() } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { - numberError(); + tokenError(); } break; @@ -904,7 +926,7 @@ JSONParser::getToken() if ((*p >= '0') && (*p <= '9')) { lex_state = ls_number_after_point; } else { - numberError(); + tokenError(); } break; @@ -922,7 +944,7 @@ JSONParser::getToken() } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { - numberError(); + tokenError(); } break; @@ -932,7 +954,7 @@ JSONParser::getToken() } else if ((*p == '+') || (*p == '-')) { lex_state = ls_number_e_sign; } else { - numberError(); + tokenError(); } break; @@ -940,7 +962,7 @@ JSONParser::getToken() if ((*p >= '0') && (*p <= '9')) { lex_state = ls_number; } else { - numberError(); + tokenError(); } break; @@ -955,7 +977,7 @@ JSONParser::getToken() action = reread; ready = true; } else { - numberError(); + tokenError(); } break; @@ -969,10 +991,7 @@ JSONParser::getToken() action = reread; ready = true; } else { - QTC::TC("libtests", "JSON parse keyword bad character"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": keyword: unexpected character " + std::string(p, 1)); + tokenError(); } break; @@ -1025,11 +1044,8 @@ JSONParser::getToken() u_value = 0; break; default: - QTC::TC("libtests", "JSON parse backslash bad character"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": invalid character after backslash: " + - std::string(p, 1)); + lex_state = ls_backslash; + tokenError(); } break; @@ -1043,10 +1059,7 @@ JSONParser::getToken() } else if ('A' <= *p && *p <= 'F') { u_value = 16 * u_value + (10 + ui(*p) - ui('A')); } else { - QTC::TC("libtests", "JSON parse bad hex after u"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset - u_count - 1) + - ": \\u must be followed by four hex digits"); + tokenError(); } if (++u_count == 4) { handle_u_code( @@ -1094,8 +1107,7 @@ JSONParser::getToken() break; default: - QTC::TC("libtests", "JSON parse ls premature end of input"); - throw std::runtime_error("JSON: premature end of input"); + tokenError(); } } } -- cgit v1.2.3-54-g00ecf From ee32235f54884247f6117fc0fbdd462a4e38ac1f Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 15:21:29 +0000 Subject: In JSONParser::getToken handle legal control chars early Also, reject them in strings. --- libqpdf/JSON.cc | 564 ++++++++++++++++++---------------- libtests/libtests.testcov | 1 + libtests/qtest/json_parse.test | 8 +- libtests/qtest/json_parse/bad-01.out | 2 +- libtests/qtest/json_parse/bad-02.out | 2 +- libtests/qtest/json_parse/bad-03.out | 2 +- libtests/qtest/json_parse/bad-27.out | 2 +- libtests/qtest/json_parse/bad-31.json | 2 +- libtests/qtest/json_parse/bad-45.out | 2 +- libtests/qtest/json_parse/bad-46.out | 2 +- libtests/qtest/json_parse/bad-47.out | 2 +- 11 files changed, 305 insertions(+), 284 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index afeda315..e9637e86 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -723,10 +723,11 @@ JSONParser::handle_u_code( void JSONParser::tokenError() { - if (bytes == 0) { + if (done) { QTC::TC("libtests", "JSON parse ls premature end of input"); throw std::runtime_error("JSON: premature end of input"); } + if (lex_state == ls_u4) { QTC::TC("libtests", "JSON parse bad hex after u"); throw std::runtime_error( @@ -737,6 +738,11 @@ JSONParser::tokenError() throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " + std::string(p, 1)); + } else if (lex_state == ls_string) { + QTC::TC("libtests", "JSON parse control char in string"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": control character in string (missing \"?)"); } else if (lex_state == ls_backslash) { QTC::TC("libtests", "JSON parse backslash bad character"); throw std::runtime_error( @@ -779,6 +785,7 @@ JSONParser::tokenError() "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " + std::string(p, 1)); } + throw std::logic_error("JSON::tokenError : unhandled error"); } void @@ -792,7 +799,7 @@ JSONParser::getToken() unsigned long high_surrogate = 0; qpdf_offset_t high_offset = 0; - while (!done) { + while (true) { if (p == (buf + bytes)) { p = buf; bytes = is.read(buf, sizeof(buf)); @@ -808,307 +815,320 @@ JSONParser::getToken() // end the current token (unless we are still before the start // of the token). if (lex_state == ls_top) { - // Continue with token + ++p; + ++offset; } else { - // done + break; } + } else { QTC::TC("libtests", "JSON parse null character"); throw std::runtime_error( "JSON: control or null character at offset " + std::to_string(offset)); } - } - action = append; - switch (lex_state) { - case ls_top: - token_start = offset; - if (*p == '"') { - lex_state = ls_string; - action = ignore; - } else if (QUtil::is_space(*p)) { - action = ignore; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ':') { - lex_state = ls_colon; - action = ignore; - ready = true; - } else if (*p == '{') { - lex_state = ls_begin_dict; - action = ignore; - ready = true; - } else if (*p == '}') { - lex_state = ls_end_dict; - action = ignore; - ready = true; - } else if (*p == '[') { - lex_state = ls_begin_array; - action = ignore; - ready = true; - } else if (*p == ']') { - lex_state = ls_end_array; - action = ignore; - ready = true; - } else if ((*p >= 'a') && (*p <= 'z')) { - lex_state = ls_alpha; - } else if (*p == '-') { - lex_state = ls_number_minus; - } else if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; - } else if (*p == '0') { - lex_state = ls_number_leading_zero; - } else { - QTC::TC("libtests", "JSON parse bad character"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": unexpected character " + std::string(p, 1)); - } - break; - - case ls_number_minus: - if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; - } else if (*p == '0') { - lex_state = ls_number_leading_zero; - } else { - QTC::TC("libtests", "JSON parse number minus no digits"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": numeric literal: no digit after minus sign"); - } - break; - - case ls_number_leading_zero: - if (*p == '.') { - lex_state = ls_number_point; - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - QTC::TC("libtests", "JSON parse leading zero"); - throw std::runtime_error( - "JSON: offset " + std::to_string(offset) + - ": number with leading zero"); - } - break; - - case ls_number_before_point: - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (*p == '.') { - lex_state = ls_number_point; - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - tokenError(); - } - break; - - case ls_number_point: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number_after_point; - } else { - tokenError(); - } - break; - - case ls_number_after_point: - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (QUtil::is_space(*p)) { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; - } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; - } else { - tokenError(); - } - break; + } else { + action = append; + switch (lex_state) { + case ls_top: + token_start = offset; + if (*p == '"') { + lex_state = ls_string; + action = ignore; + } else if (*p == ' ') { + action = ignore; + } else if (*p == ',') { + lex_state = ls_comma; + action = ignore; + ready = true; + } else if (*p == ',') { + lex_state = ls_comma; + action = ignore; + ready = true; + } else if (*p == ':') { + lex_state = ls_colon; + action = ignore; + ready = true; + } else if (*p == '{') { + lex_state = ls_begin_dict; + action = ignore; + ready = true; + } else if (*p == '}') { + lex_state = ls_end_dict; + action = ignore; + ready = true; + } else if (*p == '[') { + lex_state = ls_begin_array; + action = ignore; + ready = true; + } else if (*p == ']') { + lex_state = ls_end_array; + action = ignore; + ready = true; + } else if ((*p >= 'a') && (*p <= 'z')) { + lex_state = ls_alpha; + } else if (*p == '-') { + lex_state = ls_number_minus; + } else if ((*p >= '1') && (*p <= '9')) { + lex_state = ls_number_before_point; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + } else { + QTC::TC("libtests", "JSON parse bad character"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": unexpected character " + std::string(p, 1)); + } + break; - case ls_number_e: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; - } else if ((*p == '+') || (*p == '-')) { - lex_state = ls_number_e_sign; - } else { - tokenError(); - } - break; + case ls_number_minus: + if ((*p >= '1') && (*p <= '9')) { + lex_state = ls_number_before_point; + } else if (*p == '0') { + lex_state = ls_number_leading_zero; + } else { + QTC::TC("libtests", "JSON parse number minus no digits"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": numeric literal: no digit after minus sign"); + } + break; - case ls_number_e_sign: - if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; - } else { - tokenError(); - } - break; + case ls_number_leading_zero: + if (*p == '.') { + lex_state = ls_number_point; + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + QTC::TC("libtests", "JSON parse leading zero"); + throw std::runtime_error( + "JSON: offset " + std::to_string(offset) + + ": number with leading zero"); + } + break; - case ls_number: - // We only get here after we have seen an exponent. - if ((*p >= '0') && (*p <= '9')) { - // continue - } else if (QUtil::is_space(*p)) { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; - } else { - tokenError(); - } - break; + case ls_number_before_point: + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == '.') { + lex_state = ls_number_point; + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + tokenError(); + } + break; - case ls_alpha: - if ((*p >= 'a') && (*p <= 'z')) { - // okay - } else if (QUtil::is_space(*p)) { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; - } else { - tokenError(); - } - break; + case ls_number_point: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number_after_point; + } else { + tokenError(); + } + break; - case ls_string: - if (*p == '"') { - if (high_offset) { - QTC::TC("libtests", "JSON 16 dangling high"); - throw std::runtime_error( - "JSON: offset " + std::to_string(high_offset) + - ": UTF-16 high surrogate not followed by low " - "surrogate"); + case ls_number_after_point: + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == ' ') { + lex_state = ls_number; + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + lex_state = ls_number; + action = reread; + ready = true; + } else if (*p == 'e' || *p == 'E') { + lex_state = ls_number_e; + } else { + tokenError(); } - action = ignore; - ready = true; - } else if (*p == '\\') { - lex_state = ls_backslash; - action = ignore; - } - break; + break; - case ls_backslash: - action = ignore; - lex_state = ls_string; - switch (*p) { - case '\\': - case '\"': - case '/': - // \/ is allowed in json input, but so is /, so we - // don't map / to \/ in output. - token += *p; + case ls_number_e: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number; + } else if ((*p == '+') || (*p == '-')) { + lex_state = ls_number_e_sign; + } else { + tokenError(); + } break; - case 'b': - token += '\b'; + + case ls_number_e_sign: + if ((*p >= '0') && (*p <= '9')) { + lex_state = ls_number; + } else { + tokenError(); + } break; - case 'f': - token += '\f'; + + case ls_number: + // We only get here after we have seen an exponent. + if ((*p >= '0') && (*p <= '9')) { + // continue + } else if (*p == ' ') { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + tokenError(); + } break; - case 'n': - token += '\n'; + + case ls_alpha: + if ((*p >= 'a') && (*p <= 'z')) { + // okay + } else if (*p == ' ') { + action = ignore; + ready = true; + } else if (strchr("{}[]:,", *p)) { + action = reread; + ready = true; + } else { + tokenError(); + } break; - case 'r': - token += '\r'; + + case ls_string: + if (*p == '"') { + if (high_offset) { + QTC::TC("libtests", "JSON 16 dangling high"); + throw std::runtime_error( + "JSON: offset " + std::to_string(high_offset) + + ": UTF-16 high surrogate not followed by low " + "surrogate"); + } + action = ignore; + ready = true; + } else if (*p == '\\') { + lex_state = ls_backslash; + action = ignore; + } break; - case 't': - token += '\t'; + + case ls_backslash: + action = ignore; + lex_state = ls_string; + switch (*p) { + case '\\': + case '\"': + case '/': + // \/ is allowed in json input, but so is /, so we + // don't map / to \/ in output. + token += *p; + break; + case 'b': + token += '\b'; + break; + case 'f': + token += '\f'; + break; + case 'n': + token += '\n'; + break; + case 'r': + token += '\r'; + break; + case 't': + token += '\t'; + break; + case 'u': + lex_state = ls_u4; + u_count = 0; + u_value = 0; + break; + default: + lex_state = ls_backslash; + tokenError(); + } break; - case 'u': - lex_state = ls_u4; - u_count = 0; - u_value = 0; + + case ls_u4: + using ui = unsigned int; + action = ignore; + if ('0' <= *p && *p <= '9') { + u_value = 16 * u_value + (ui(*p) - ui('0')); + } else if ('a' <= *p && *p <= 'f') { + u_value = 16 * u_value + (10 + ui(*p) - ui('a')); + } else if ('A' <= *p && *p <= 'F') { + u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + } else { + tokenError(); + } + if (++u_count == 4) { + handle_u_code( + u_value, + offset - 5, + high_surrogate, + high_offset, + token); + lex_state = ls_string; + } break; + default: - lex_state = ls_backslash; - tokenError(); + throw std::logic_error( + "JSONParser::getToken : trying to handle delimiter state"); } - break; - - case ls_u4: - using ui = unsigned int; - action = ignore; - if ('0' <= *p && *p <= '9') { - u_value = 16 * u_value + (ui(*p) - ui('0')); - } else if ('a' <= *p && *p <= 'f') { - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); - } else if ('A' <= *p && *p <= 'F') { - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); - } else { - tokenError(); + switch (action) { + case reread: + break; + case append: + token.append(1, *p); + // fall through + case ignore: + ++p; + ++offset; + break; } - if (++u_count == 4) { - handle_u_code( - u_value, offset - 5, high_surrogate, high_offset, token); - lex_state = ls_string; + if (ready) { + return; } - break; - - default: - throw std::logic_error( - "JSONParser::getToken : trying to handle delimiter state"); - } - switch (action) { - case reread: - break; - case append: - token.append(1, *p); - // fall through - case ignore: - ++p; - ++offset; - break; - } - if (ready) { - break; } } - if (done) { - if (!token.empty() && !ready) { - switch (lex_state) { - case ls_top: - // Can't happen - throw std::logic_error("tok_start set in ls_top while parsing"); - break; - case ls_number_leading_zero: - case ls_number_before_point: - case ls_number_after_point: - lex_state = ls_number; - break; + // We only get here if on end of input or if the last character was a + // control character. - case ls_number: - case ls_alpha: - // terminal state - break; + if (!token.empty()) { + switch (lex_state) { + case ls_top: + // Can't happen + throw std::logic_error("tok_start set in ls_top while parsing"); + break; - default: - tokenError(); - } + case ls_number_leading_zero: + case ls_number_before_point: + case ls_number_after_point: + lex_state = ls_number; + break; + + case ls_number: + case ls_alpha: + // terminal state + break; + + default: + tokenError(); } } } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 4b3bb45b..5e5c2e00 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -79,6 +79,7 @@ JSON parse number minus no digits 0 JSON parse incomplete number 0 JSON parse keyword bad character 0 JSON parse backslash bad character 0 +JSON parse control char in string 0 JSON parse leading zero 0 JSON parse ls premature end of input 0 JSON parse bad hex after u 0 diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 8234b755..699544f6 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -125,10 +125,10 @@ my @bad = ( "e after minus", # 42 "missing digit after e", # 43 "missing digit after e+/-", # 44 - # "tab char in string", # 45 - # "cr char in string", # 46 - # "lf char in string", # 47 - # "bs char in string", # 48 + "tab char in string", # 45 + "cr char in string", # 46 + "lf char in string", # 47 + "bs char in string", # 48 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-01.out b/libtests/qtest/json_parse/bad-01.out index a4254cff..8ae96c30 100644 --- a/libtests/qtest/json_parse/bad-01.out +++ b/libtests/qtest/json_parse/bad-01.out @@ -1 +1 @@ -exception: bad-01.json: JSON: offset 9: material follows end of object: junk +exception: bad-01.json: JSON: offset 8: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-02.out b/libtests/qtest/json_parse/bad-02.out index 485c9658..212b2f4f 100644 --- a/libtests/qtest/json_parse/bad-02.out +++ b/libtests/qtest/json_parse/bad-02.out @@ -1 +1 @@ -exception: bad-02.json: JSON: offset 11: material follows end of object: junk +exception: bad-02.json: JSON: offset 10: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-03.out b/libtests/qtest/json_parse/bad-03.out index 38f35119..a1411e0e 100644 --- a/libtests/qtest/json_parse/bad-03.out +++ b/libtests/qtest/json_parse/bad-03.out @@ -1 +1 @@ -exception: bad-03.json: JSON: offset 16: material follows end of object: junk +exception: bad-03.json: JSON: offset 15: material follows end of object: junk diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out index 70fcbf74..4c1ecfeb 100644 --- a/libtests/qtest/json_parse/bad-27.out +++ b/libtests/qtest/json_parse/bad-27.out @@ -1 +1 @@ -exception: bad-27.json: JSON: premature end of input +exception: bad-27.json: JSON: offset 5: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-31.json b/libtests/qtest/json_parse/bad-31.json index 39cdd0de..277cc02f 100644 --- a/libtests/qtest/json_parse/bad-31.json +++ b/libtests/qtest/json_parse/bad-31.json @@ -1 +1 @@ -- +- diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out index ba7e4f16..d4320b0a 100644 --- a/libtests/qtest/json_parse/bad-45.out +++ b/libtests/qtest/json_parse/bad-45.out @@ -1 +1 @@ -"Tab in str\ting" +exception: bad-45.json: JSON: offset 11: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out index 2baad6a4..50aa5ffb 100644 --- a/libtests/qtest/json_parse/bad-46.out +++ b/libtests/qtest/json_parse/bad-46.out @@ -1 +1 @@ -"cr in str\ring" +exception: bad-46.json: JSON: offset 10: control character in string (missing "?) diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out index 30549072..39f9d3d5 100644 --- a/libtests/qtest/json_parse/bad-47.out +++ b/libtests/qtest/json_parse/bad-47.out @@ -1 +1 @@ -"lf in str\ning" +exception: bad-47.json: JSON: offset 10: control character in string (missing "?) -- cgit v1.2.3-54-g00ecf From d3152869b666a725d303e0667a69f973fc5a96ed Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 30 Jan 2023 13:17:09 +0000 Subject: In JSONParser::getToken handle structural and space chars early --- libqpdf/JSON.cc | 164 ++++++++++++++++++++--------------- libtests/qtest/json_parse/bad-09.out | 2 +- libtests/qtest/json_parse/bad-31.out | 2 +- 3 files changed, 95 insertions(+), 73 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index e9637e86..59843c05 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -791,7 +791,7 @@ JSONParser::tokenError() void JSONParser::getToken() { - enum { append, ignore, reread } action = append; + enum { append, ignore } action = append; bool ready = false; token.clear(); @@ -820,13 +820,103 @@ JSONParser::getToken() } else { break; } - } else { QTC::TC("libtests", "JSON parse null character"); throw std::runtime_error( "JSON: control or null character at offset " + std::to_string(offset)); } + } else if (*p == ',') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_comma; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == ':') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_colon; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == ' ') { + if (lex_state == ls_top) { + ++p; + ++offset; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == '{') { + if (lex_state == ls_top) { + token_start = offset; + ++p; + ++offset; + lex_state = ls_begin_dict; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == '}') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_end_dict; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == '[') { + if (lex_state == ls_top) { + token_start = offset; + ++p; + ++offset; + lex_state = ls_begin_array; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } + } else if (*p == ']') { + if (lex_state == ls_top) { + ++p; + ++offset; + lex_state = ls_end_array; + return; + } else if (lex_state == ls_string) { + token += *p; + ++p; + ++offset; + } else { + break; + } } else { action = append; switch (lex_state) { @@ -835,36 +925,6 @@ JSONParser::getToken() if (*p == '"') { lex_state = ls_string; action = ignore; - } else if (*p == ' ') { - action = ignore; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ',') { - lex_state = ls_comma; - action = ignore; - ready = true; - } else if (*p == ':') { - lex_state = ls_colon; - action = ignore; - ready = true; - } else if (*p == '{') { - lex_state = ls_begin_dict; - action = ignore; - ready = true; - } else if (*p == '}') { - lex_state = ls_end_dict; - action = ignore; - ready = true; - } else if (*p == '[') { - lex_state = ls_begin_array; - action = ignore; - ready = true; - } else if (*p == ']') { - lex_state = ls_end_array; - action = ignore; - ready = true; } else if ((*p >= 'a') && (*p <= 'z')) { lex_state = ls_alpha; } else if (*p == '-') { @@ -897,14 +957,6 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { lex_state = ls_number_point; - } else if (*p == ' ') { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { @@ -920,14 +972,6 @@ JSONParser::getToken() // continue } else if (*p == '.') { lex_state = ls_number_point; - } else if (*p == ' ') { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { @@ -946,14 +990,6 @@ JSONParser::getToken() case ls_number_after_point: if ((*p >= '0') && (*p <= '9')) { // continue - } else if (*p == ' ') { - lex_state = ls_number; - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - lex_state = ls_number; - action = reread; - ready = true; } else if (*p == 'e' || *p == 'E') { lex_state = ls_number_e; } else { @@ -983,12 +1019,6 @@ JSONParser::getToken() // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { // continue - } else if (*p == ' ') { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; } else { tokenError(); } @@ -997,12 +1027,6 @@ JSONParser::getToken() case ls_alpha: if ((*p >= 'a') && (*p <= 'z')) { // okay - } else if (*p == ' ') { - action = ignore; - ready = true; - } else if (strchr("{}[]:,", *p)) { - action = reread; - ready = true; } else { tokenError(); } @@ -1090,8 +1114,6 @@ JSONParser::getToken() "JSONParser::getToken : trying to handle delimiter state"); } switch (action) { - case reread: - break; case append: token.append(1, *p); // fall through @@ -1107,7 +1129,7 @@ JSONParser::getToken() } // We only get here if on end of input or if the last character was a - // control character. + // control character or other delimiter. if (!token.empty()) { switch (lex_state) { diff --git a/libtests/qtest/json_parse/bad-09.out b/libtests/qtest/json_parse/bad-09.out index 21d2f1c1..979d53d0 100644 --- a/libtests/qtest/json_parse/bad-09.out +++ b/libtests/qtest/json_parse/bad-09.out @@ -1 +1 @@ -exception: bad-09.json: JSON: offset 3: expect string as dictionary key +exception: bad-09.json: JSON: offset 2: expect string as dictionary key diff --git a/libtests/qtest/json_parse/bad-31.out b/libtests/qtest/json_parse/bad-31.out index 2228d08d..af177726 100644 --- a/libtests/qtest/json_parse/bad-31.out +++ b/libtests/qtest/json_parse/bad-31.out @@ -1 +1 @@ -exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign +exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number -- cgit v1.2.3-54-g00ecf From f6c9019597c5077d3e99c6d41a598b49b385f59c Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 1 Feb 2023 10:29:32 +0000 Subject: Add new methods JSONParser::append and ignore Reduce boilerplate and increase efficiency by avoiding setting and branching on action and ready in getToken. --- libqpdf/JSON.cc | 190 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 59843c05..c4b3ed00 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -605,16 +605,6 @@ namespace std::shared_ptr parse(); private: - void getToken(); - void handleToken(); - void tokenError(); - static void handle_u_code( - unsigned long codepoint, - qpdf_offset_t offset, - unsigned long& high_surrogate, - qpdf_offset_t& high_offset, - std::string& result); - enum parser_state_e { ps_top, ps_dict_begin, @@ -662,6 +652,20 @@ namespace std::shared_ptr item; }; + void getToken(); + void handleToken(); + void tokenError(); + static void handle_u_code( + unsigned long codepoint, + qpdf_offset_t offset, + unsigned long& high_surrogate, + qpdf_offset_t& high_offset, + std::string& result); + inline void append(); + inline void append(lex_state_e); + inline void ignore(); + inline void ignore(lex_state_e); + InputSource& is; JSON::Reactor* reactor; lex_state_e lex_state; @@ -788,11 +792,48 @@ JSONParser::tokenError() throw std::logic_error("JSON::tokenError : unhandled error"); } +// Append current character to token and advance to next input character. +inline void +JSONParser::append() +{ + token += *p; + ++p; + ++offset; +} + +// Append current character to token, advance to next input character and +// transition to 'next' lexer state. +inline void +JSONParser::append(lex_state_e next) +{ + lex_state = next; + token += *p; + ++p; + ++offset; +} + +// Advance to next input character without appending the current character to +// token. +inline void +JSONParser::ignore() +{ + ++p; + ++offset; +} + +// Advance to next input character without appending the current character to +// token and transition to 'next' lexer state. +inline void +JSONParser::ignore(lex_state_e next) +{ + lex_state = next; + ++p; + ++offset; +} + void JSONParser::getToken() { - enum { append, ignore } action = append; - bool ready = false; token.clear(); // Keep track of UTF-16 surrogate pairs. @@ -815,8 +856,7 @@ JSONParser::getToken() // end the current token (unless we are still before the start // of the token). if (lex_state == ls_top) { - ++p; - ++offset; + ignore(); } else { break; } @@ -828,111 +868,82 @@ JSONParser::getToken() } } else if (*p == ',') { if (lex_state == ls_top) { - ++p; - ++offset; - lex_state = ls_comma; + ignore(ls_comma); return; } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else if (*p == ':') { if (lex_state == ls_top) { - ++p; - ++offset; - lex_state = ls_colon; + ignore(ls_colon); return; } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else if (*p == ' ') { if (lex_state == ls_top) { - ++p; - ++offset; + ignore(); } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else if (*p == '{') { if (lex_state == ls_top) { token_start = offset; - ++p; - ++offset; - lex_state = ls_begin_dict; + ignore(ls_begin_dict); return; } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else if (*p == '}') { if (lex_state == ls_top) { - ++p; - ++offset; - lex_state = ls_end_dict; + ignore(ls_end_dict); return; } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else if (*p == '[') { if (lex_state == ls_top) { token_start = offset; - ++p; - ++offset; - lex_state = ls_begin_array; + ignore(ls_begin_array); return; } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else if (*p == ']') { if (lex_state == ls_top) { - ++p; - ++offset; - lex_state = ls_end_array; + ignore(ls_end_array); return; } else if (lex_state == ls_string) { - token += *p; - ++p; - ++offset; + append(); } else { break; } } else { - action = append; switch (lex_state) { case ls_top: token_start = offset; if (*p == '"') { - lex_state = ls_string; - action = ignore; + ignore(ls_string); } else if ((*p >= 'a') && (*p <= 'z')) { - lex_state = ls_alpha; + append(ls_alpha); } else if (*p == '-') { - lex_state = ls_number_minus; + append(ls_number_minus); } else if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; + append(ls_number_before_point); } else if (*p == '0') { - lex_state = ls_number_leading_zero; + append(ls_number_leading_zero); } else { QTC::TC("libtests", "JSON parse bad character"); throw std::runtime_error( @@ -943,9 +954,9 @@ JSONParser::getToken() case ls_number_minus: if ((*p >= '1') && (*p <= '9')) { - lex_state = ls_number_before_point; + append(ls_number_before_point); } else if (*p == '0') { - lex_state = ls_number_leading_zero; + append(ls_number_leading_zero); } else { QTC::TC("libtests", "JSON parse number minus no digits"); throw std::runtime_error( @@ -956,9 +967,9 @@ JSONParser::getToken() case ls_number_leading_zero: if (*p == '.') { - lex_state = ls_number_point; + append(ls_number_point); } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; + append(ls_number_e); } else { QTC::TC("libtests", "JSON parse leading zero"); throw std::runtime_error( @@ -969,11 +980,11 @@ JSONParser::getToken() case ls_number_before_point: if ((*p >= '0') && (*p <= '9')) { - // continue + append(); } else if (*p == '.') { - lex_state = ls_number_point; + append(ls_number_point); } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; + append(ls_number_e); } else { tokenError(); } @@ -981,7 +992,7 @@ JSONParser::getToken() case ls_number_point: if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number_after_point; + append(ls_number_after_point); } else { tokenError(); } @@ -989,9 +1000,9 @@ JSONParser::getToken() case ls_number_after_point: if ((*p >= '0') && (*p <= '9')) { - // continue + append(); } else if (*p == 'e' || *p == 'E') { - lex_state = ls_number_e; + append(ls_number_e); } else { tokenError(); } @@ -999,9 +1010,9 @@ JSONParser::getToken() case ls_number_e: if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; + append(ls_number); } else if ((*p == '+') || (*p == '-')) { - lex_state = ls_number_e_sign; + append(ls_number_e_sign); } else { tokenError(); } @@ -1009,7 +1020,7 @@ JSONParser::getToken() case ls_number_e_sign: if ((*p >= '0') && (*p <= '9')) { - lex_state = ls_number; + append(ls_number); } else { tokenError(); } @@ -1018,7 +1029,7 @@ JSONParser::getToken() case ls_number: // We only get here after we have seen an exponent. if ((*p >= '0') && (*p <= '9')) { - // continue + append(); } else { tokenError(); } @@ -1026,7 +1037,7 @@ JSONParser::getToken() case ls_alpha: if ((*p >= 'a') && (*p <= 'z')) { - // okay + append(); } else { tokenError(); } @@ -1041,16 +1052,16 @@ JSONParser::getToken() ": UTF-16 high surrogate not followed by low " "surrogate"); } - action = ignore; - ready = true; + ignore(); + return; } else if (*p == '\\') { - lex_state = ls_backslash; - action = ignore; + ignore(ls_backslash); + } else { + append(); } break; case ls_backslash: - action = ignore; lex_state = ls_string; switch (*p) { case '\\': @@ -1084,11 +1095,11 @@ JSONParser::getToken() lex_state = ls_backslash; tokenError(); } + ignore(); break; case ls_u4: using ui = unsigned int; - action = ignore; if ('0' <= *p && *p <= '9') { u_value = 16 * u_value + (ui(*p) - ui('0')); } else if ('a' <= *p && *p <= 'f') { @@ -1107,24 +1118,13 @@ JSONParser::getToken() token); lex_state = ls_string; } + ignore(); break; default: throw std::logic_error( "JSONParser::getToken : trying to handle delimiter state"); } - switch (action) { - case append: - token.append(1, *p); - // fall through - case ignore: - ++p; - ++offset; - break; - } - if (ready) { - return; - } } } -- cgit v1.2.3-54-g00ecf From b6f048546f7ffdd228bd9360c647b3064dfa1bf3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 29 Jan 2023 15:21:29 +0000 Subject: Eliminate the use of shared pointers in JSONParser --- include/qpdf/JSON.hh | 1 + libqpdf/JSON.cc | 64 +++++++++++++++++++++++++++------------------------- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index 64f3792c..4b829017 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -54,6 +54,7 @@ class JSON { public: static int constexpr LATEST = 2; + JSON() = default; QPDF_DLL std::string unparse() const; diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c4b3ed00..1dc09013 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -602,7 +602,7 @@ namespace { } - std::shared_ptr parse(); + JSON parse(); private: enum parser_state_e { @@ -642,14 +642,14 @@ namespace struct StackFrame { - StackFrame(parser_state_e state, std::shared_ptr& item) : + StackFrame(parser_state_e state, JSON& item) : state(state), item(item) { } parser_state_e state; - std::shared_ptr item; + JSON item; }; void getToken(); @@ -860,6 +860,7 @@ JSONParser::getToken() } else { break; } + } else { QTC::TC("libtests", "JSON parse null character"); throw std::runtime_error( @@ -1169,18 +1170,19 @@ JSONParser::handleToken() ": material follows end of object: " + token); } - std::shared_ptr item; - auto tos = stack.empty() ? nullptr : stack.back().item; + const static JSON null_item = JSON::makeNull(); + JSON item; + auto tos = stack.empty() ? null_item : stack.back().item; auto ls = lex_state; lex_state = ls_top; switch (ls) { case ls_begin_dict: - item = std::make_shared(JSON::makeDictionary()); + item = JSON::makeDictionary(); break; case ls_begin_array: - item = std::make_shared(JSON::makeArray()); + item = JSON::makeArray(); break; case ls_colon: @@ -1220,9 +1222,9 @@ JSONParser::handleToken() ": unexpected array end delimiter"); } parser_state = stack.back().state; - tos->setEnd(offset); + tos.setEnd(offset); if (reactor) { - reactor->containerEnd(*tos); + reactor->containerEnd(tos); } if (parser_state != ps_done) { stack.pop_back(); @@ -1238,9 +1240,9 @@ JSONParser::handleToken() ": unexpected dictionary end delimiter"); } parser_state = stack.back().state; - tos->setEnd(offset); + tos.setEnd(offset); if (reactor) { - reactor->containerEnd(*tos); + reactor->containerEnd(tos); } if (parser_state != ps_done) { stack.pop_back(); @@ -1248,16 +1250,16 @@ JSONParser::handleToken() return; case ls_number: - item = std::make_shared(JSON::makeNumber(token)); + item = JSON::makeNumber(token); break; case ls_alpha: if (token == "true") { - item = std::make_shared(JSON::makeBool(true)); + item = JSON::makeBool(true); } else if (token == "false") { - item = std::make_shared(JSON::makeBool(false)); + item = JSON::makeBool(false); } else if (token == "null") { - item = std::make_shared(JSON::makeNull()); + item = JSON::makeNull(); } else { QTC::TC("libtests", "JSON parse invalid keyword"); throw std::runtime_error( @@ -1274,7 +1276,7 @@ JSONParser::handleToken() parser_state = ps_dict_after_key; return; } else { - item = std::make_shared(JSON::makeString(token)); + item = JSON::makeString(token); } break; @@ -1284,8 +1286,8 @@ JSONParser::handleToken() break; } - item->setStart(token_start); - item->setEnd(offset); + item.setStart(token_start); + item.setEnd(offset); switch (parser_state) { case ps_dict_begin: @@ -1297,28 +1299,28 @@ JSONParser::handleToken() break; case ps_dict_after_colon: - if (tos->checkDictionaryKeySeen(dict_key)) { + if (tos.checkDictionaryKeySeen(dict_key)) { QTC::TC("libtests", "JSON parse duplicate key"); throw std::runtime_error( "JSON: offset " + std::to_string(dict_key_offset) + ": duplicated dictionary key"); } - if (!reactor || !reactor->dictionaryItem(dict_key, *item)) { - tos->addDictionaryMember(dict_key, *item); + if (!reactor || !reactor->dictionaryItem(dict_key, item)) { + tos.addDictionaryMember(dict_key, item); } parser_state = ps_dict_after_item; break; case ps_array_begin: case ps_array_after_comma: - if (!reactor || !reactor->arrayItem(*item)) { - tos->addArrayElement(*item); + if (!reactor || !reactor->arrayItem(item)) { + tos.addArrayElement(item); } parser_state = ps_array_after_item; break; case ps_top: - if (!(item->isDictionary() || item->isArray())) { + if (!(item.isDictionary() || item.isArray())) { stack.push_back({ps_done, item}); parser_state = ps_done; return; @@ -1349,18 +1351,18 @@ JSONParser::handleToken() "JSONParser::handleToken: unexpected parser state"); } - if (item->isDictionary() || item->isArray()) { + if (item.isDictionary() || item.isArray()) { stack.push_back({parser_state, item}); // Calling container start method is postponed until after // adding the containers to their parent containers, if any. // This makes it much easier to keep track of the current // nesting level. - if (item->isDictionary()) { + if (item.isDictionary()) { if (reactor) { reactor->dictionaryStart(); } parser_state = ps_dict_begin; - } else if (item->isArray()) { + } else if (item.isArray()) { if (reactor) { reactor->arrayStart(); } @@ -1375,7 +1377,7 @@ JSONParser::handleToken() } } -std::shared_ptr +JSON JSONParser::parse() { while (!done) { @@ -1387,7 +1389,7 @@ JSONParser::parse() throw std::runtime_error("JSON: premature end of input"); } auto const& tos = stack.back().item; - if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) { + if (reactor && !(tos.isArray() || tos.isDictionary())) { reactor->topLevelScalar(); } return tos; @@ -1397,7 +1399,7 @@ JSON JSON::parse(InputSource& is, Reactor* reactor) { JSONParser jp(is, reactor); - return *jp.parse(); + return jp.parse(); } JSON @@ -1405,7 +1407,7 @@ JSON::parse(std::string const& s) { BufferInputSource bis("json input", s); JSONParser jp(bis, nullptr); - return *jp.parse(); + return jp.parse(); } void -- cgit v1.2.3-54-g00ecf