aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <jberkenbilt@users.noreply.github.com>2023-02-18 23:43:02 +0100
committerGitHub <noreply@github.com>2023-02-18 23:43:02 +0100
commite4e03e9ac10d8ea0c1a8ef8c78f0103068928822 (patch)
treeaf59791cbe95f318581878ebc7d4b95fd8b20be3
parent76189c44a2f656e6623a7d36519e406d488a9ee5 (diff)
parentb6f048546f7ffdd228bd9360c647b3064dfa1bf3 (diff)
downloadqpdf-e4e03e9ac10d8ea0c1a8ef8c78f0103068928822.tar.zst
Merge pull request #890 from m-holger/jpsp
Eliminate the use of shared pointers in JSONParser
-rw-r--r--include/qpdf/JSON.hh1
-rw-r--r--libqpdf/JSON.cc1103
-rw-r--r--libtests/libtests.testcov8
-rw-r--r--libtests/qtest/json_parse.test8
-rw-r--r--libtests/qtest/json_parse/bad-01.out2
-rw-r--r--libtests/qtest/json_parse/bad-02.out2
-rw-r--r--libtests/qtest/json_parse/bad-03.out2
-rw-r--r--libtests/qtest/json_parse/bad-04.out2
-rw-r--r--libtests/qtest/json_parse/bad-09.out2
-rw-r--r--libtests/qtest/json_parse/bad-18.out2
-rw-r--r--libtests/qtest/json_parse/bad-27.out2
-rw-r--r--libtests/qtest/json_parse/bad-28.out2
-rw-r--r--libtests/qtest/json_parse/bad-30.out2
-rw-r--r--libtests/qtest/json_parse/bad-31.json2
-rw-r--r--libtests/qtest/json_parse/bad-31.out2
-rw-r--r--libtests/qtest/json_parse/bad-32.out2
-rw-r--r--libtests/qtest/json_parse/bad-33.out2
-rw-r--r--libtests/qtest/json_parse/bad-34.out2
-rw-r--r--libtests/qtest/json_parse/bad-41.json2
-rw-r--r--libtests/qtest/json_parse/bad-41.out1
-rw-r--r--libtests/qtest/json_parse/bad-42.json1
-rw-r--r--libtests/qtest/json_parse/bad-42.out1
-rw-r--r--libtests/qtest/json_parse/bad-43.json1
-rw-r--r--libtests/qtest/json_parse/bad-43.out1
-rw-r--r--libtests/qtest/json_parse/bad-44.json1
-rw-r--r--libtests/qtest/json_parse/bad-44.out1
-rw-r--r--libtests/qtest/json_parse/bad-45.json1
-rw-r--r--libtests/qtest/json_parse/bad-45.out1
-rw-r--r--libtests/qtest/json_parse/bad-46.json1
-rw-r--r--libtests/qtest/json_parse/bad-46.out1
-rw-r--r--libtests/qtest/json_parse/bad-47.json2
-rw-r--r--libtests/qtest/json_parse/bad-47.out1
-rw-r--r--libtests/qtest/json_parse/bad-48.json1
-rw-r--r--libtests/qtest/json_parse/bad-48.out1
-rw-r--r--libtests/qtest/json_parse/good-01-react.out4
-rw-r--r--libtests/qtest/json_parse/good-04-react.out10
-rw-r--r--libtests/qtest/json_parse/good-10-react.out10
-rw-r--r--libtests/qtest/json_parse/good-11-react.out4
-rw-r--r--qpdf/qtest/qpdf/qjson-objects-not-dict.out2
-rw-r--r--qpdf/qtest/qpdf/qjson-stream-not-dict.out2
-rw-r--r--qpdf/qtest/qpdf/qjson-trailer-not-dict.out2
41 files changed, 641 insertions, 559 deletions
diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh
index 28da3f24..2906d85a 100644
--- a/include/qpdf/JSON.hh
+++ b/include/qpdf/JSON.hh
@@ -54,6 +54,7 @@ class JSON
{
public:
static int constexpr LATEST = 2;
+ JSON() = default;
QPDF_DLL
std::string unparse() const;
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc
index aa9b6e41..76db652b 100644
--- a/libqpdf/JSON.cc
+++ b/libqpdf/JSON.cc
@@ -4,19 +4,11 @@
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_Concatenate.hh>
#include <qpdf/Pl_String.hh>
-#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <cstring>
#include <stdexcept>
-template <typename T>
-static qpdf_offset_t
-toO(T const& i)
-{
- return QIntC::to_offset(i);
-}
-
JSON::Members::Members(std::shared_ptr<JSON_value> value) :
value(value),
start(0),
@@ -622,11 +614,6 @@ namespace
is(is),
reactor(reactor),
lex_state(ls_top),
- number_before_point(0),
- number_after_point(0),
- number_after_e(0),
- number_saw_point(false),
- number_saw_e(false),
bytes(0),
p(buf),
u_count(0),
@@ -637,21 +624,9 @@ namespace
{
}
- std::shared_ptr<JSON> parse();
+ JSON parse();
private:
- void getToken();
- void handleToken();
- static std::string
- decode_string(std::string const& json, qpdf_offset_t offset);
- static void handle_u_code(
- char const* s,
- qpdf_offset_t offset,
- qpdf_offset_t i,
- unsigned long& high_surrogate,
- qpdf_offset_t& high_offset,
- std::string& result);
-
enum parser_state_e {
ps_top,
ps_dict_begin,
@@ -668,30 +643,65 @@ namespace
enum lex_state_e {
ls_top,
ls_number,
+ ls_number_minus,
+ ls_number_leading_zero,
+ ls_number_before_point,
+ ls_number_point,
+ ls_number_after_point,
+ ls_number_e,
+ ls_number_e_sign,
ls_alpha,
ls_string,
ls_backslash,
ls_u4,
+ ls_begin_array,
+ ls_end_array,
+ ls_begin_dict,
+ ls_end_dict,
+ ls_colon,
+ ls_comma,
};
+ struct StackFrame
+ {
+ StackFrame(parser_state_e state, JSON& item) :
+ state(state),
+ item(item)
+ {
+ }
+
+ parser_state_e state;
+ JSON item;
+ };
+
+ void getToken();
+ void handleToken();
+ void tokenError();
+ static void handle_u_code(
+ unsigned long codepoint,
+ qpdf_offset_t offset,
+ unsigned long& high_surrogate,
+ qpdf_offset_t& high_offset,
+ std::string& result);
+ inline void append();
+ inline void append(lex_state_e);
+ inline void ignore();
+ inline void ignore(lex_state_e);
+
InputSource& is;
JSON::Reactor* reactor;
lex_state_e lex_state;
- size_t number_before_point;
- size_t number_after_point;
- size_t number_after_e;
- bool number_saw_point;
- bool number_saw_e;
char buf[16384];
size_t bytes;
char const* p;
qpdf_offset_t u_count;
+ unsigned long u_value{0};
qpdf_offset_t offset;
bool done;
std::string token;
+ qpdf_offset_t token_start{0};
parser_state_e parser_state;
- std::vector<std::shared_ptr<JSON>> stack;
- std::vector<parser_state_e> ps_stack;
+ std::vector<StackFrame> stack;
std::string dict_key;
qpdf_offset_t dict_key_offset;
};
@@ -699,22 +709,15 @@ namespace
void
JSONParser::handle_u_code(
- char const* s,
+ unsigned long codepoint,
qpdf_offset_t offset,
- qpdf_offset_t i,
unsigned long& high_surrogate,
qpdf_offset_t& high_offset,
std::string& result)
{
- std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));
- unsigned char high = static_cast<unsigned char>(hex.at(0));
- unsigned char low = static_cast<unsigned char>(hex.at(1));
- unsigned long codepoint = high;
- codepoint <<= 8;
- codepoint += low;
if ((codepoint & 0xFC00) == 0xD800) {
// high surrogate
- qpdf_offset_t new_high_offset = offset + i;
+ qpdf_offset_t new_high_offset = offset;
if (high_offset) {
QTC::TC("libtests", "JSON 16 high high");
throw std::runtime_error(
@@ -727,10 +730,10 @@ JSONParser::handle_u_code(
high_surrogate = codepoint;
} else if ((codepoint & 0xFC00) == 0xDC00) {
// low surrogate
- if (offset + i != (high_offset + 6)) {
+ if (offset != (high_offset + 6)) {
QTC::TC("libtests", "JSON 16 low not after high");
throw std::runtime_error(
- "JSON: offset " + std::to_string(offset + i) +
+ "JSON: offset " + std::to_string(offset) +
": UTF-16 low surrogate found not immediately after high"
" surrogate");
}
@@ -743,88 +746,123 @@ JSONParser::handle_u_code(
}
}
-std::string
-JSONParser::decode_string(std::string const& str, qpdf_offset_t offset)
+void
+JSONParser::tokenError()
{
- // The string has already been validated when this private method
- // is called, so errors are logic errors instead of runtime
- // errors.
- size_t len = str.length();
- if ((len < 2) || (str.at(0) != '"') || (str.at(len - 1) != '"')) {
- throw std::logic_error(
- "JSON Parse: decode_string called with other than \"...\"");
+ if (done) {
+ QTC::TC("libtests", "JSON parse ls premature end of input");
+ throw std::runtime_error("JSON: premature end of input");
}
- char const* s = str.c_str();
- // Move inside the quotation marks
- ++s;
- len -= 2;
- // Keep track of UTF-16 surrogate pairs.
- unsigned long high_surrogate = 0;
- qpdf_offset_t high_offset = 0;
- std::string result;
- qpdf_offset_t olen = toO(len);
- for (qpdf_offset_t i = 0; i < olen; ++i) {
- if (s[i] == '\\') {
- if (i + 1 >= olen) {
- throw std::logic_error("JSON parse: nothing after \\");
- }
- char ch = s[++i];
- switch (ch) {
- case '\\':
- case '\"':
- case '/':
- // \/ is allowed in json input, but so is /, so we
- // don't map / to \/ in output.
- result.append(1, ch);
- break;
- case 'b':
- result.append(1, '\b');
- break;
- case 'f':
- result.append(1, '\f');
- break;
- case 'n':
- result.append(1, '\n');
- break;
- case 'r':
- result.append(1, '\r');
- break;
- case 't':
- result.append(1, '\t');
- break;
- case 'u':
- if (i + 4 >= olen) {
- throw std::logic_error(
- "JSON parse: not enough characters after \\u");
- }
- handle_u_code(
- s, offset, i, high_surrogate, high_offset, result);
- i += 4;
- break;
- default:
- throw std::logic_error("JSON parse: bad character after \\");
- break;
- }
+
+ if (lex_state == ls_u4) {
+ QTC::TC("libtests", "JSON parse bad hex after u");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset - u_count - 1) +
+ ": \\u must be followed by four hex digits");
+ } else if (lex_state == ls_alpha) {
+ QTC::TC("libtests", "JSON parse keyword bad character");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": keyword: unexpected character " + std::string(p, 1));
+ } else if (lex_state == ls_string) {
+ QTC::TC("libtests", "JSON parse control char in string");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": control character in string (missing \"?)");
+ } else if (lex_state == ls_backslash) {
+ QTC::TC("libtests", "JSON parse backslash bad character");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": invalid character after backslash: " + std::string(p, 1));
+ }
+
+ if (*p == '.') {
+ if (lex_state == ls_number || lex_state == ls_number_e ||
+ lex_state == ls_number_e_sign) {
+ QTC::TC("libtests", "JSON parse point after e");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: decimal point after e");
} else {
- result.append(1, s[i]);
+ QTC::TC("libtests", "JSON parse duplicate point");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: decimal point already seen");
}
- }
- if (high_offset) {
- QTC::TC("libtests", "JSON 16 dangling high");
+ } else if (*p == 'e' || *p == 'E') {
+ QTC::TC("libtests", "JSON parse duplicate e");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: e already seen");
+ } else if ((*p == '+') || (*p == '-')) {
+ QTC::TC("libtests", "JSON parse unexpected sign");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: unexpected sign");
+ } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) {
+ QTC::TC("libtests", "JSON parse incomplete number");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: incomplete number");
+
+ } else {
+ QTC::TC("libtests", "JSON parse numeric bad character");
throw std::runtime_error(
- "JSON: offset " + std::to_string(high_offset) +
- ": UTF-16 high surrogate not followed by low surrogate");
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: unexpected character " + std::string(p, 1));
}
- return result;
+ throw std::logic_error("JSON::tokenError : unhandled error");
+}
+
+// Append current character to token and advance to next input character.
+inline void
+JSONParser::append()
+{
+ token += *p;
+ ++p;
+ ++offset;
+}
+
+// Append current character to token, advance to next input character and
+// transition to 'next' lexer state.
+inline void
+JSONParser::append(lex_state_e next)
+{
+ lex_state = next;
+ token += *p;
+ ++p;
+ ++offset;
+}
+
+// Advance to next input character without appending the current character to
+// token.
+inline void
+JSONParser::ignore()
+{
+ ++p;
+ ++offset;
+}
+
+// Advance to next input character without appending the current character to
+// token and transition to 'next' lexer state.
+inline void
+JSONParser::ignore(lex_state_e next)
+{
+ lex_state = next;
+ ++p;
+ ++offset;
}
void
JSONParser::getToken()
{
- enum { append, ignore, reread } action = append;
- bool ready = false;
token.clear();
- while (!done) {
+
+ // Keep track of UTF-16 surrogate pairs.
+ unsigned long high_surrogate = 0;
+ qpdf_offset_t high_offset = 0;
+
+ while (true) {
if (p == (buf + bytes)) {
p = buf;
bytes = is.read(buf, sizeof(buf));
@@ -834,210 +872,316 @@ JSONParser::getToken()
}
}
- if (*p == 0) {
- QTC::TC("libtests", "JSON parse null character");
- throw std::runtime_error(
- "JSON: null character at offset " + std::to_string(offset));
- }
- action = append;
- switch (lex_state) {
- case ls_top:
- if (*p == '"') {
- lex_state = ls_string;
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- } else if ((*p >= 'a') && (*p <= 'z')) {
- lex_state = ls_alpha;
- } else if (*p == '-') {
- lex_state = ls_number;
- number_before_point = 0;
- number_after_point = 0;
- number_after_e = 0;
- number_saw_point = false;
- number_saw_e = false;
- } else if ((*p >= '0') && (*p <= '9')) {
- lex_state = ls_number;
- number_before_point = 1;
- number_after_point = 0;
- number_after_e = 0;
- number_saw_point = false;
- number_saw_e = false;
- } else if (*p == '.') {
- lex_state = ls_number;
- number_before_point = 0;
- number_after_point = 0;
- number_after_e = 0;
- number_saw_point = true;
- number_saw_e = false;
- } else if (strchr("{}[]:,", *p)) {
- ready = true;
+ if ((*p < 32 && *p >= 0)) {
+ if (*p == '\t' || *p == '\n' || *p == '\r') {
+ // Legal white space not permitted in strings. This will always
+ // end the current token (unless we are still before the start
+ // of the token).
+ if (lex_state == ls_top) {
+ ignore();
+ } else {
+ break;
+ }
+
} else {
- QTC::TC("libtests", "JSON parse bad character");
+ QTC::TC("libtests", "JSON parse null character");
throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected character " + std::string(p, 1));
+ "JSON: control or null character at offset " +
+ std::to_string(offset));
}
- break;
-
- case ls_number:
- if ((*p >= '0') && (*p <= '9')) {
- if (number_saw_e) {
- ++number_after_e;
- } else if (number_saw_point) {
- ++number_after_point;
+ } else if (*p == ',') {
+ if (lex_state == ls_top) {
+ ignore(ls_comma);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == ':') {
+ if (lex_state == ls_top) {
+ ignore(ls_colon);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == ' ') {
+ if (lex_state == ls_top) {
+ ignore();
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == '{') {
+ if (lex_state == ls_top) {
+ token_start = offset;
+ ignore(ls_begin_dict);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == '}') {
+ if (lex_state == ls_top) {
+ ignore(ls_end_dict);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == '[') {
+ if (lex_state == ls_top) {
+ token_start = offset;
+ ignore(ls_begin_array);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == ']') {
+ if (lex_state == ls_top) {
+ ignore(ls_end_array);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else {
+ switch (lex_state) {
+ case ls_top:
+ token_start = offset;
+ if (*p == '"') {
+ ignore(ls_string);
+ } else if ((*p >= 'a') && (*p <= 'z')) {
+ append(ls_alpha);
+ } else if (*p == '-') {
+ append(ls_number_minus);
+ } else if ((*p >= '1') && (*p <= '9')) {
+ append(ls_number_before_point);
+ } else if (*p == '0') {
+ append(ls_number_leading_zero);
} else {
- ++number_before_point;
- }
- } else if (*p == '.') {
- if (number_saw_e) {
- QTC::TC("libtests", "JSON parse point after e");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": numeric literal: decimal point after e");
- } else if (number_saw_point) {
- QTC::TC("libtests", "JSON parse duplicate point");
+ QTC::TC("libtests", "JSON parse bad character");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": numeric literal: decimal point already seen");
- } else {
- number_saw_point = true;
+ ": unexpected character " + std::string(p, 1));
}
- } else if (*p == 'e') {
- if (number_saw_e) {
- QTC::TC("libtests", "JSON parse duplicate e");
+ break;
+
+ case ls_number_minus:
+ if ((*p >= '1') && (*p <= '9')) {
+ append(ls_number_before_point);
+ } else if (*p == '0') {
+ append(ls_number_leading_zero);
+ } else {
+ QTC::TC("libtests", "JSON parse number minus no digits");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": numeric literal: e already seen");
- } else {
- number_saw_e = true;
+ ": numeric literal: no digit after minus sign");
}
- } else if ((*p == '+') || (*p == '-')) {
- if (number_saw_e && (number_after_e == 0)) {
- // okay
+ break;
+
+ case ls_number_leading_zero:
+ if (*p == '.') {
+ append(ls_number_point);
+ } else if (*p == 'e' || *p == 'E') {
+ append(ls_number_e);
} else {
- QTC::TC("libtests", "JSON parse unexpected sign");
+ QTC::TC("libtests", "JSON parse leading zero");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": numeric literal: unexpected sign");
+ ": number with leading zero");
}
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
- } else {
- QTC::TC("libtests", "JSON parse numeric bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": numeric literal: unexpected character " +
- std::string(p, 1));
- }
- break;
+ break;
- case ls_alpha:
- if ((*p >= 'a') && (*p <= 'z')) {
- // okay
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
- } else {
- QTC::TC("libtests", "JSON parse keyword bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": keyword: unexpected character " + std::string(p, 1));
- }
- break;
+ case ls_number_before_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ append();
+ } else if (*p == '.') {
+ append(ls_number_point);
+ } else if (*p == 'e' || *p == 'E') {
+ append(ls_number_e);
+ } else {
+ tokenError();
+ }
+ break;
- case ls_string:
- if (*p == '"') {
- ready = true;
- } else if (*p == '\\') {
- lex_state = ls_backslash;
- }
- break;
+ case ls_number_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ append(ls_number_after_point);
+ } else {
+ tokenError();
+ }
+ break;
- case ls_backslash:
- /* cSpell: ignore bfnrt */
- if (strchr("\\\"/bfnrt", *p)) {
- lex_state = ls_string;
- } else if (*p == 'u') {
- lex_state = ls_u4;
- u_count = 0;
- } else {
- QTC::TC("libtests", "JSON parse backslash bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": invalid character after backslash: " +
- std::string(p, 1));
- }
- break;
+ case ls_number_after_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ append();
+ } else if (*p == 'e' || *p == 'E') {
+ append(ls_number_e);
+ } else {
+ tokenError();
+ }
+ break;
- case ls_u4:
- if (!QUtil::is_hex_digit(*p)) {
- QTC::TC("libtests", "JSON parse bad hex after u");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset - u_count - 1) +
- ": \\u must be followed by four hex digits");
- }
- if (++u_count == 4) {
- lex_state = ls_string;
- }
- break;
- }
- switch (action) {
- case reread:
- break;
- case append:
- token.append(1, *p);
- // fall through
- case ignore:
- ++p;
- ++offset;
- break;
- }
- if (ready) {
- break;
- }
- }
- if (done) {
- if ((!token.empty()) && (!ready)) {
- switch (lex_state) {
- case ls_top:
- // Can't happen
- throw std::logic_error("tok_start set in ls_top while parsing");
+ case ls_number_e:
+ if ((*p >= '0') && (*p <= '9')) {
+ append(ls_number);
+ } else if ((*p == '+') || (*p == '-')) {
+ append(ls_number_e_sign);
+ } else {
+ tokenError();
+ }
+ break;
+
+ case ls_number_e_sign:
+ if ((*p >= '0') && (*p <= '9')) {
+ append(ls_number);
+ } else {
+ tokenError();
+ }
break;
case ls_number:
- case ls_alpha:
- // okay
+ // We only get here after we have seen an exponent.
+ if ((*p >= '0') && (*p <= '9')) {
+ append();
+ } else {
+ tokenError();
+ }
break;
- case ls_u4:
- QTC::TC("libtests", "JSON parse premature end of u");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset - u_count - 1) +
- ": \\u must be followed by four characters");
+ case ls_alpha:
+ if ((*p >= 'a') && (*p <= 'z')) {
+ append();
+ } else {
+ tokenError();
+ }
+ break;
case ls_string:
+ if (*p == '"') {
+ if (high_offset) {
+ QTC::TC("libtests", "JSON 16 dangling high");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(high_offset) +
+ ": UTF-16 high surrogate not followed by low "
+ "surrogate");
+ }
+ ignore();
+ return;
+ } else if (*p == '\\') {
+ ignore(ls_backslash);
+ } else {
+ append();
+ }
+ break;
+
case ls_backslash:
- QTC::TC("libtests", "JSON parse unterminated string");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unterminated string");
+ lex_state = ls_string;
+ switch (*p) {
+ case '\\':
+ case '\"':
+ case '/':
+ // \/ is allowed in json input, but so is /, so we
+ // don't map / to \/ in output.
+ token += *p;
+ break;
+ case 'b':
+ token += '\b';
+ break;
+ case 'f':
+ token += '\f';
+ break;
+ case 'n':
+ token += '\n';
+ break;
+ case 'r':
+ token += '\r';
+ break;
+ case 't':
+ token += '\t';
+ break;
+ case 'u':
+ lex_state = ls_u4;
+ u_count = 0;
+ u_value = 0;
+ break;
+ default:
+ lex_state = ls_backslash;
+ tokenError();
+ }
+ ignore();
break;
+
+ case ls_u4:
+ using ui = unsigned int;
+ if ('0' <= *p && *p <= '9') {
+ u_value = 16 * u_value + (ui(*p) - ui('0'));
+ } else if ('a' <= *p && *p <= 'f') {
+ u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
+ } else if ('A' <= *p && *p <= 'F') {
+ u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
+ } else {
+ tokenError();
+ }
+ if (++u_count == 4) {
+ handle_u_code(
+ u_value,
+ offset - 5,
+ high_surrogate,
+ high_offset,
+ token);
+ lex_state = ls_string;
+ }
+ ignore();
+ break;
+
+ default:
+ throw std::logic_error(
+ "JSONParser::getToken : trying to handle delimiter state");
}
}
}
+
+ // We only get here if on end of input or if the last character was a
+ // control character or other delimiter.
+
+ if (!token.empty()) {
+ switch (lex_state) {
+ case ls_top:
+ // Can't happen
+ throw std::logic_error("tok_start set in ls_top while parsing");
+ break;
+
+ case ls_number_leading_zero:
+ case ls_number_before_point:
+ case ls_number_after_point:
+ lex_state = ls_number;
+ break;
+
+ case ls_number:
+ case ls_alpha:
+ // terminal state
+ break;
+
+ default:
+ tokenError();
+ }
+ }
}
void
JSONParser::handleToken()
{
- if (token.empty()) {
+ if (lex_state == ls_top) {
return;
}
@@ -1048,73 +1192,96 @@ JSONParser::handleToken()
": material follows end of object: " + token);
}
- // Git string value
- std::string s_value;
- if (lex_state == ls_string) {
- // Token includes the quotation marks
- if (token.length() < 2) {
- throw std::logic_error("JSON string length < 2");
- }
- s_value = decode_string(token, offset - toO(token.length()));
- }
- // Based on the lexical state and value, figure out whether we are
- // looking at an item or a delimiter. It will always be exactly
- // one of those two or an error condition.
-
- std::shared_ptr<JSON> item;
- char delimiter = '\0';
- // Already verified that token is not empty
- char first_char = token.at(0);
- switch (lex_state) {
- case ls_top:
- switch (first_char) {
- case '{':
- item = std::make_shared<JSON>(JSON::makeDictionary());
- item->setStart(offset - toO(token.length()));
- break;
+ const static JSON null_item = JSON::makeNull();
+ JSON item;
+ auto tos = stack.empty() ? null_item : stack.back().item;
+ auto ls = lex_state;
+ lex_state = ls_top;
- case '[':
- item = std::make_shared<JSON>(JSON::makeArray());
- item->setStart(offset - toO(token.length()));
- break;
+ switch (ls) {
+ case ls_begin_dict:
+ item = JSON::makeDictionary();
+ break;
- default:
- delimiter = first_char;
- break;
- }
+ case ls_begin_array:
+ item = JSON::makeArray();
break;
- case ls_number:
- if (number_saw_point && (number_after_point == 0)) {
- QTC::TC("libtests", "JSON parse decimal with no digits");
+ case ls_colon:
+ if (parser_state != ps_dict_after_key) {
+ QTC::TC("libtests", "JSON parse unexpected :");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": decimal point with no digits");
+ ": unexpected colon");
}
- if ((number_before_point > 1) &&
- ((first_char == '0') ||
- ((first_char == '-') && (token.at(1) == '0')))) {
- QTC::TC("libtests", "JSON parse leading zero");
+ parser_state = ps_dict_after_colon;
+ return;
+
+ case ls_comma:
+ if (!((parser_state == ps_dict_after_item) ||
+ (parser_state == ps_array_after_item))) {
+ QTC::TC("libtests", "JSON parse unexpected ,");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": number with leading zero");
+ ": unexpected comma");
}
- if ((number_before_point == 0) && (number_after_point == 0)) {
- QTC::TC("libtests", "JSON parse number no digits");
+ if (parser_state == ps_dict_after_item) {
+ parser_state = ps_dict_after_comma;
+ } else if (parser_state == ps_array_after_item) {
+ parser_state = ps_array_after_comma;
+ } else {
+ throw std::logic_error("JSONParser::handleToken: unexpected parser"
+ " state for comma");
+ }
+ return;
+
+ case ls_end_array:
+ if (!(parser_state == ps_array_begin ||
+ parser_state == ps_array_after_item)) {
+ QTC::TC("libtests", "JSON parse unexpected ]");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": number with no digits");
+ ": unexpected array end delimiter");
}
- item = std::make_shared<JSON>(JSON::makeNumber(token));
+ parser_state = stack.back().state;
+ tos.setEnd(offset);
+ if (reactor) {
+ reactor->containerEnd(tos);
+ }
+ if (parser_state != ps_done) {
+ stack.pop_back();
+ }
+ return;
+
+ case ls_end_dict:
+ if (!((parser_state == ps_dict_begin) ||
+ (parser_state == ps_dict_after_item))) {
+ QTC::TC("libtests", "JSON parse unexpected }");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": unexpected dictionary end delimiter");
+ }
+ parser_state = stack.back().state;
+ tos.setEnd(offset);
+ if (reactor) {
+ reactor->containerEnd(tos);
+ }
+ if (parser_state != ps_done) {
+ stack.pop_back();
+ }
+ return;
+
+ case ls_number:
+ item = JSON::makeNumber(token);
break;
case ls_alpha:
if (token == "true") {
- item = std::make_shared<JSON>(JSON::makeBool(true));
+ item = JSON::makeBool(true);
} else if (token == "false") {
- item = std::make_shared<JSON>(JSON::makeBool(false));
+ item = JSON::makeBool(false);
} else if (token == "null") {
- item = std::make_shared<JSON>(JSON::makeNull());
+ item = JSON::makeNull();
} else {
QTC::TC("libtests", "JSON parse invalid keyword");
throw std::runtime_error(
@@ -1124,227 +1291,115 @@ JSONParser::handleToken()
break;
case ls_string:
- item = std::make_shared<JSON>(JSON::makeString(s_value));
+ if (parser_state == ps_dict_begin ||
+ parser_state == ps_dict_after_comma) {
+ dict_key = token;
+ dict_key_offset = token_start;
+ parser_state = ps_dict_after_key;
+ return;
+ } else {
+ item = JSON::makeString(token);
+ }
break;
- case ls_backslash:
- case ls_u4:
+ default:
throw std::logic_error(
- "tok_end is set while state = ls_backslash or ls_u4");
+ "JSONParser::handleToken : non-terminal lexer state encountered");
break;
}
- if ((item == nullptr) == (delimiter == '\0')) {
- throw std::logic_error(
- "JSONParser::handleToken: logic error: exactly one of item"
- " or delimiter must be set");
- }
-
- // See whether what we have is allowed at this point.
-
- if (item.get()) {
- switch (parser_state) {
- case ps_done:
- throw std::logic_error("can't happen; ps_done already handled");
- break;
-
- case ps_dict_after_key:
- QTC::TC("libtests", "JSON parse expected colon");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) + ": expected ':'");
- break;
-
- case ps_dict_after_item:
- QTC::TC("libtests", "JSON parse expected , or }");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": expected ',' or '}'");
- break;
-
- case ps_array_after_item:
- QTC::TC("libtests", "JSON parse expected, or ]");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": expected ',' or ']'");
- break;
-
- case ps_dict_begin:
- case ps_dict_after_comma:
- if (lex_state != ls_string) {
- QTC::TC("libtests", "JSON parse string as dict key");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": expect string as dictionary key");
- }
- break;
-
- case ps_top:
- case ps_dict_after_colon:
- case ps_array_begin:
- case ps_array_after_comma:
- break;
- // okay
- }
- } else if (delimiter == '}') {
- if (!((parser_state == ps_dict_begin) ||
- (parser_state == ps_dict_after_item)))
+ item.setStart(token_start);
+ item.setEnd(offset);
- {
- QTC::TC("libtests", "JSON parse unexpected }");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected dictionary end delimiter");
- }
- } else if (delimiter == ']') {
- if (!((parser_state == ps_array_begin) ||
- (parser_state == ps_array_after_item)))
+ switch (parser_state) {
+ case ps_dict_begin:
+ case ps_dict_after_comma:
+ QTC::TC("libtests", "JSON parse string as dict key");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": expect string as dictionary key");
+ break;
- {
- QTC::TC("libtests", "JSON parse unexpected ]");
+ case ps_dict_after_colon:
+ if (tos.checkDictionaryKeySeen(dict_key)) {
+ QTC::TC("libtests", "JSON parse duplicate key");
throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected array end delimiter");
+ "JSON: offset " + std::to_string(dict_key_offset) +
+ ": duplicated dictionary key");
}
- } else if (delimiter == ':') {
- if (parser_state != ps_dict_after_key) {
- QTC::TC("libtests", "JSON parse unexpected :");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected colon");
- }
- } else if (delimiter == ',') {
- if (!((parser_state == ps_dict_after_item) ||
- (parser_state == ps_array_after_item))) {
- QTC::TC("libtests", "JSON parse unexpected ,");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected comma");
+ if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
+ tos.addDictionaryMember(dict_key, item);
}
- } else if (delimiter != '\0') {
- throw std::logic_error("JSONParser::handleToken: bad delimiter");
- }
-
- // Now we know we have a delimiter or item that is allowed. Do
- // whatever we need to do with it.
+ parser_state = ps_dict_after_item;
+ break;
- parser_state_e next_state = ps_top;
- if (delimiter == ':') {
- next_state = ps_dict_after_colon;
- } else if (delimiter == ',') {
- if (parser_state == ps_dict_after_item) {
- next_state = ps_dict_after_comma;
- } else if (parser_state == ps_array_after_item) {
- next_state = ps_array_after_comma;
- } else {
- throw std::logic_error("JSONParser::handleToken: unexpected parser"
- " state for comma");
- }
- } else if ((delimiter == '}') || (delimiter == ']')) {
- next_state = ps_stack.back();
- ps_stack.pop_back();
- auto tos = stack.back();
- tos->setEnd(offset);
- if (reactor) {
- reactor->containerEnd(*tos);
- }
- if (next_state != ps_done) {
- stack.pop_back();
- }
- } else if (delimiter != '\0') {
- throw std::logic_error(
- "JSONParser::handleToken: unexpected delimiter in transition");
- } else if (item.get()) {
- if (!(item->isArray() || item->isDictionary())) {
- item->setStart(offset - toO(token.length()));
- item->setEnd(offset);
+ case ps_array_begin:
+ case ps_array_after_comma:
+ if (!reactor || !reactor->arrayItem(item)) {
+ tos.addArrayElement(item);
}
+ parser_state = ps_array_after_item;
+ break;
- std::shared_ptr<JSON> tos;
- if (!stack.empty()) {
- tos = stack.back();
+ case ps_top:
+ if (!(item.isDictionary() || item.isArray())) {
+ stack.push_back({ps_done, item});
+ parser_state = ps_done;
+ return;
}
- switch (parser_state) {
- case ps_dict_begin:
- case ps_dict_after_comma:
- this->dict_key = s_value;
- this->dict_key_offset = item->getStart();
- item = nullptr;
- next_state = ps_dict_after_key;
- break;
+ parser_state = ps_done;
+ break;
- case ps_dict_after_colon:
- if (tos->checkDictionaryKeySeen(dict_key)) {
- QTC::TC("libtests", "JSON parse duplicate key");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(dict_key_offset) +
- ": duplicated dictionary key");
- }
- if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {
- tos->addDictionaryMember(dict_key, *item);
- }
- next_state = ps_dict_after_item;
- break;
+ case ps_dict_after_key:
+ QTC::TC("libtests", "JSON parse expected colon");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) + ": expected ':'");
+ break;
- case ps_array_begin:
- case ps_array_after_comma:
- if (!reactor || !reactor->arrayItem(*item)) {
- tos->addArrayElement(*item);
- }
- next_state = ps_array_after_item;
- break;
+ case ps_dict_after_item:
+ QTC::TC("libtests", "JSON parse expected , or }");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
+ break;
- case ps_top:
- next_state = ps_done;
- break;
+ case ps_array_after_item:
+ QTC::TC("libtests", "JSON parse expected, or ]");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
+ break;
- case ps_dict_after_key:
- case ps_dict_after_item:
- case ps_array_after_item:
- case ps_done:
- throw std::logic_error(
- "JSONParser::handleToken: unexpected parser state");
- }
- } else {
+ case ps_done:
throw std::logic_error(
- "JSONParser::handleToken: unexpected null item in transition");
+ "JSONParser::handleToken: unexpected parser state");
}
- if (reactor && item.get()) {
+ if (item.isDictionary() || item.isArray()) {
+ stack.push_back({parser_state, item});
// Calling container start method is postponed until after
// adding the containers to their parent containers, if any.
// This makes it much easier to keep track of the current
// nesting level.
- if (item->isDictionary()) {
- reactor->dictionaryStart();
- } else if (item->isArray()) {
- reactor->arrayStart();
+ if (item.isDictionary()) {
+ if (reactor) {
+ reactor->dictionaryStart();
+ }
+ parser_state = ps_dict_begin;
+ } else if (item.isArray()) {
+ if (reactor) {
+ reactor->arrayStart();
+ }
+ parser_state = ps_array_begin;
}
- }
- // Prepare for next token
- if (item.get()) {
- if (item->isDictionary()) {
- stack.push_back(item);
- ps_stack.push_back(next_state);
- next_state = ps_dict_begin;
- } else if (item->isArray()) {
- stack.push_back(item);
- ps_stack.push_back(next_state);
- next_state = ps_array_begin;
- } else if (parser_state == ps_top) {
- stack.push_back(item);
+ if (stack.size() > 500) {
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": maximum object depth exceeded");
}
}
- if (ps_stack.size() > 500) {
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": maximum object depth exceeded");
- }
- parser_state = next_state;
- lex_state = ls_top;
}
-std::shared_ptr<JSON>
+JSON
JSONParser::parse()
{
while (!done) {
@@ -1355,8 +1410,8 @@ JSONParser::parse()
QTC::TC("libtests", "JSON parse premature EOF");
throw std::runtime_error("JSON: premature end of input");
}
- auto const& tos = stack.back();
- if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) {
+ auto const& tos = stack.back().item;
+ if (reactor && !(tos.isArray() || tos.isDictionary())) {
reactor->topLevelScalar();
}
return tos;
@@ -1366,7 +1421,7 @@ JSON
JSON::parse(InputSource& is, Reactor* reactor)
{
JSONParser jp(is, reactor);
- return *jp.parse();
+ return jp.parse();
}
JSON
@@ -1374,7 +1429,7 @@ JSON::parse(std::string const& s)
{
BufferInputSource bis("json input", s);
JSONParser jp(bis, nullptr);
- return *jp.parse();
+ return jp.parse();
}
void
diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov
index 2ceef541..5e5c2e00 100644
--- a/libtests/libtests.testcov
+++ b/libtests/libtests.testcov
@@ -58,7 +58,6 @@ QPDFArgParser bad option for help 0
QPDFArgParser bad topic for help 0
QPDFArgParser invalid choice handler to unknown 0
JSON parse junk after object 0
-JSON parse decimal with no digits 0
JSON parse invalid keyword 0
JSON parse expected colon 0
JSON parse expected , or } 0
@@ -76,12 +75,13 @@ JSON parse duplicate point 0
JSON parse duplicate e 0
JSON parse unexpected sign 0
JSON parse numeric bad character 0
+JSON parse number minus no digits 0
+JSON parse incomplete number 0
JSON parse keyword bad character 0
JSON parse backslash bad character 0
-JSON parse unterminated string 0
+JSON parse control char in string 0
JSON parse leading zero 0
-JSON parse number no digits 0
-JSON parse premature end of u 0
+JSON parse ls premature end of input 0
JSON parse bad hex after u 0
JSONHandler unhandled value 0
JSONHandler unexpected key 0
diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test
index 112da0a9..699544f6 100644
--- a/libtests/qtest/json_parse.test
+++ b/libtests/qtest/json_parse.test
@@ -121,6 +121,14 @@ my @bad = (
"high high surrogate", # 38
"dangling high surrogate", # 39
"duplicate dictionary key", # 40
+ "decimal point after minus",# 41
+ "e after minus", # 42
+ "missing digit after e", # 43
+ "missing digit after e+/-", # 44
+ "tab char in string", # 45
+ "cr char in string", # 46
+ "lf char in string", # 47
+ "bs char in string", # 48
);
my $i = 0;
diff --git a/libtests/qtest/json_parse/bad-01.out b/libtests/qtest/json_parse/bad-01.out
index a4254cff..8ae96c30 100644
--- a/libtests/qtest/json_parse/bad-01.out
+++ b/libtests/qtest/json_parse/bad-01.out
@@ -1 +1 @@
-exception: bad-01.json: JSON: offset 9: material follows end of object: junk
+exception: bad-01.json: JSON: offset 8: material follows end of object: junk
diff --git a/libtests/qtest/json_parse/bad-02.out b/libtests/qtest/json_parse/bad-02.out
index 485c9658..212b2f4f 100644
--- a/libtests/qtest/json_parse/bad-02.out
+++ b/libtests/qtest/json_parse/bad-02.out
@@ -1 +1 @@
-exception: bad-02.json: JSON: offset 11: material follows end of object: junk
+exception: bad-02.json: JSON: offset 10: material follows end of object: junk
diff --git a/libtests/qtest/json_parse/bad-03.out b/libtests/qtest/json_parse/bad-03.out
index 38f35119..a1411e0e 100644
--- a/libtests/qtest/json_parse/bad-03.out
+++ b/libtests/qtest/json_parse/bad-03.out
@@ -1 +1 @@
-exception: bad-03.json: JSON: offset 16: material follows end of object: junk
+exception: bad-03.json: JSON: offset 15: material follows end of object: junk
diff --git a/libtests/qtest/json_parse/bad-04.out b/libtests/qtest/json_parse/bad-04.out
index 7fe71693..27d252f2 100644
--- a/libtests/qtest/json_parse/bad-04.out
+++ b/libtests/qtest/json_parse/bad-04.out
@@ -1 +1 @@
-exception: bad-04.json: JSON: offset 5: decimal point with no digits
+exception: bad-04.json: JSON: offset 4: unexpected character .
diff --git a/libtests/qtest/json_parse/bad-09.out b/libtests/qtest/json_parse/bad-09.out
index 21d2f1c1..979d53d0 100644
--- a/libtests/qtest/json_parse/bad-09.out
+++ b/libtests/qtest/json_parse/bad-09.out
@@ -1 +1 @@
-exception: bad-09.json: JSON: offset 3: expect string as dictionary key
+exception: bad-09.json: JSON: offset 2: expect string as dictionary key
diff --git a/libtests/qtest/json_parse/bad-18.out b/libtests/qtest/json_parse/bad-18.out
index 0428b64f..1e779e41 100644
--- a/libtests/qtest/json_parse/bad-18.out
+++ b/libtests/qtest/json_parse/bad-18.out
@@ -1 +1 @@
-exception: bad-18.json: JSON: null character at offset 5
+exception: bad-18.json: JSON: control or null character at offset 5
diff --git a/libtests/qtest/json_parse/bad-27.out b/libtests/qtest/json_parse/bad-27.out
index 2c2df076..4c1ecfeb 100644
--- a/libtests/qtest/json_parse/bad-27.out
+++ b/libtests/qtest/json_parse/bad-27.out
@@ -1 +1 @@
-exception: bad-27.json: JSON: offset 6: unterminated string
+exception: bad-27.json: JSON: offset 5: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-28.out b/libtests/qtest/json_parse/bad-28.out
index d7db2aea..005a68d2 100644
--- a/libtests/qtest/json_parse/bad-28.out
+++ b/libtests/qtest/json_parse/bad-28.out
@@ -1 +1 @@
-exception: bad-28.json: JSON: offset 16: unterminated string
+exception: bad-28.json: JSON: premature end of input
diff --git a/libtests/qtest/json_parse/bad-30.out b/libtests/qtest/json_parse/bad-30.out
index bff961af..ec63bb09 100644
--- a/libtests/qtest/json_parse/bad-30.out
+++ b/libtests/qtest/json_parse/bad-30.out
@@ -1 +1 @@
-exception: bad-30.json: JSON: offset 5: decimal point with no digits
+exception: bad-30.json: JSON: offset 4: numeric literal: incomplete number
diff --git a/libtests/qtest/json_parse/bad-31.json b/libtests/qtest/json_parse/bad-31.json
index 39cdd0de..277cc02f 100644
--- a/libtests/qtest/json_parse/bad-31.json
+++ b/libtests/qtest/json_parse/bad-31.json
@@ -1 +1 @@
--
+-
diff --git a/libtests/qtest/json_parse/bad-31.out b/libtests/qtest/json_parse/bad-31.out
index 344f42e8..af177726 100644
--- a/libtests/qtest/json_parse/bad-31.out
+++ b/libtests/qtest/json_parse/bad-31.out
@@ -1 +1 @@
-exception: bad-31.json: JSON: offset 2: number with no digits
+exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number
diff --git a/libtests/qtest/json_parse/bad-32.out b/libtests/qtest/json_parse/bad-32.out
index 4372e0cf..41a681c0 100644
--- a/libtests/qtest/json_parse/bad-32.out
+++ b/libtests/qtest/json_parse/bad-32.out
@@ -1 +1 @@
-exception: bad-32.json: JSON: offset 5: number with leading zero
+exception: bad-32.json: JSON: offset 1: number with leading zero
diff --git a/libtests/qtest/json_parse/bad-33.out b/libtests/qtest/json_parse/bad-33.out
index ae41e48b..98a72942 100644
--- a/libtests/qtest/json_parse/bad-33.out
+++ b/libtests/qtest/json_parse/bad-33.out
@@ -1 +1 @@
-exception: bad-33.json: JSON: offset 6: number with leading zero
+exception: bad-33.json: JSON: offset 2: number with leading zero
diff --git a/libtests/qtest/json_parse/bad-34.out b/libtests/qtest/json_parse/bad-34.out
index f9db587a..c21838c4 100644
--- a/libtests/qtest/json_parse/bad-34.out
+++ b/libtests/qtest/json_parse/bad-34.out
@@ -1 +1 @@
-exception: bad-34.json: JSON: offset 3: \u must be followed by four characters
+exception: bad-34.json: JSON: premature end of input
diff --git a/libtests/qtest/json_parse/bad-41.json b/libtests/qtest/json_parse/bad-41.json
new file mode 100644
index 00000000..dad59049
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-41.json
@@ -0,0 +1,2 @@
+-.123
+
diff --git a/libtests/qtest/json_parse/bad-41.out b/libtests/qtest/json_parse/bad-41.out
new file mode 100644
index 00000000..bebcfdb9
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-41.out
@@ -0,0 +1 @@
+exception: bad-41.json: JSON: offset 1: numeric literal: no digit after minus sign
diff --git a/libtests/qtest/json_parse/bad-42.json b/libtests/qtest/json_parse/bad-42.json
new file mode 100644
index 00000000..2f9148b0
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-42.json
@@ -0,0 +1 @@
+-e123
diff --git a/libtests/qtest/json_parse/bad-42.out b/libtests/qtest/json_parse/bad-42.out
new file mode 100644
index 00000000..96e9a0a3
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-42.out
@@ -0,0 +1 @@
+exception: bad-42.json: JSON: offset 1: numeric literal: no digit after minus sign
diff --git a/libtests/qtest/json_parse/bad-43.json b/libtests/qtest/json_parse/bad-43.json
new file mode 100644
index 00000000..896a676a
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-43.json
@@ -0,0 +1 @@
+123e
diff --git a/libtests/qtest/json_parse/bad-43.out b/libtests/qtest/json_parse/bad-43.out
new file mode 100644
index 00000000..84070aa9
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-43.out
@@ -0,0 +1 @@
+exception: bad-43.json: JSON: offset 4: numeric literal: incomplete number
diff --git a/libtests/qtest/json_parse/bad-44.json b/libtests/qtest/json_parse/bad-44.json
new file mode 100644
index 00000000..3a5d7dff
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-44.json
@@ -0,0 +1 @@
+123e+
diff --git a/libtests/qtest/json_parse/bad-44.out b/libtests/qtest/json_parse/bad-44.out
new file mode 100644
index 00000000..f72120c4
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-44.out
@@ -0,0 +1 @@
+exception: bad-44.json: JSON: offset 5: numeric literal: incomplete number
diff --git a/libtests/qtest/json_parse/bad-45.json b/libtests/qtest/json_parse/bad-45.json
new file mode 100644
index 00000000..16107dc0
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-45.json
@@ -0,0 +1 @@
+"Tab in str ing"
diff --git a/libtests/qtest/json_parse/bad-45.out b/libtests/qtest/json_parse/bad-45.out
new file mode 100644
index 00000000..d4320b0a
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-45.out
@@ -0,0 +1 @@
+exception: bad-45.json: JSON: offset 11: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-46.json b/libtests/qtest/json_parse/bad-46.json
new file mode 100644
index 00000000..60873bf4
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-46.json
@@ -0,0 +1 @@
+"cr in str ing"
diff --git a/libtests/qtest/json_parse/bad-46.out b/libtests/qtest/json_parse/bad-46.out
new file mode 100644
index 00000000..50aa5ffb
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-46.out
@@ -0,0 +1 @@
+exception: bad-46.json: JSON: offset 10: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-47.json b/libtests/qtest/json_parse/bad-47.json
new file mode 100644
index 00000000..3c75427a
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-47.json
@@ -0,0 +1,2 @@
+"lf in str
+ing"
diff --git a/libtests/qtest/json_parse/bad-47.out b/libtests/qtest/json_parse/bad-47.out
new file mode 100644
index 00000000..39f9d3d5
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-47.out
@@ -0,0 +1 @@
+exception: bad-47.json: JSON: offset 10: control character in string (missing "?)
diff --git a/libtests/qtest/json_parse/bad-48.json b/libtests/qtest/json_parse/bad-48.json
new file mode 100644
index 00000000..1e605808
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-48.json
@@ -0,0 +1 @@
+"bs in string" \ No newline at end of file
diff --git a/libtests/qtest/json_parse/bad-48.out b/libtests/qtest/json_parse/bad-48.out
new file mode 100644
index 00000000..0b20fc7a
--- /dev/null
+++ b/libtests/qtest/json_parse/bad-48.out
@@ -0,0 +1 @@
+exception: bad-48.json: JSON: control or null character at offset 10
diff --git a/libtests/qtest/json_parse/good-01-react.out b/libtests/qtest/json_parse/good-01-react.out
index e3813bcc..3951272d 100644
--- a/libtests/qtest/json_parse/good-01-react.out
+++ b/libtests/qtest/json_parse/good-01-react.out
@@ -1,13 +1,13 @@
dictionary start
dictionary item: a -> [6, 11): "bcd"
-dictionary item: e -> [18, 0): []
+dictionary item: e -> [18, 19): []
array start
array item: [19, 20): 1
array item: [41, 42): 2
array item: [44, 45): 3
array item: [46, 47): 4
array item: [48, 54): "five"
-array item: [56, 0): {}
+array item: [56, 57): {}
dictionary start
dictionary item: six -> [64, 65): 7
dictionary item: 8 -> [72, 73): 9
diff --git a/libtests/qtest/json_parse/good-04-react.out b/libtests/qtest/json_parse/good-04-react.out
index ded004b2..8d931535 100644
--- a/libtests/qtest/json_parse/good-04-react.out
+++ b/libtests/qtest/json_parse/good-04-react.out
@@ -1,15 +1,15 @@
array start
-array item: [1, 0): []
+array item: [1, 2): []
array start
-array item: [2, 0): []
+array item: [2, 3): []
array start
-array item: [3, 0): {}
+array item: [3, 4): {}
dictionary start
container end: [3, 5): {}
container end: [2, 6): []
-array item: [8, 0): {}
+array item: [8, 9): {}
dictionary start
-dictionary item: -> [13, 0): {}
+dictionary item: -> [13, 14): {}
dictionary start
container end: [13, 15): {}
container end: [8, 16): {}
diff --git a/libtests/qtest/json_parse/good-10-react.out b/libtests/qtest/json_parse/good-10-react.out
index 3cceeb2f..8c31f915 100644
--- a/libtests/qtest/json_parse/good-10-react.out
+++ b/libtests/qtest/json_parse/good-10-react.out
@@ -1,21 +1,21 @@
dictionary start
-dictionary item: a -> [9, 0): []
+dictionary item: a -> [9, 10): []
array start
array item: [10, 11): 1
array item: [13, 14): 2
-array item: [16, 0): {}
+array item: [16, 17): {}
dictionary start
dictionary item: x -> [22, 25): "y"
container end: [16, 26): {}
array item: [28, 29): 3
-array item: [31, 0): {}
+array item: [31, 32): {}
dictionary start
dictionary item: keep -> [40, 61): "not in final output"
container end: [31, 62): {
"keep": "not in final output"
}
container end: [9, 63): []
-dictionary item: keep -> [75, 0): []
+dictionary item: keep -> [75, 76): []
array start
array item: [76, 77): 1
array item: [79, 83): null
@@ -23,7 +23,7 @@ array item: [85, 86): 2
array item: [88, 93): false
array item: [95, 101): "keep"
array item: [103, 104): 3
-array item: [106, 0): []
+array item: [106, 107): []
array start
array item: [107, 113): "this"
array item: [115, 121): "keep"
diff --git a/libtests/qtest/json_parse/good-11-react.out b/libtests/qtest/json_parse/good-11-react.out
index 6cf3345e..6d7d4275 100644
--- a/libtests/qtest/json_parse/good-11-react.out
+++ b/libtests/qtest/json_parse/good-11-react.out
@@ -1,12 +1,12 @@
array start
-array item: [4, 0): []
+array item: [4, 5): []
array start
array item: [5, 11): "u:π"
array item: [13, 23): "u:π"
array item: [25, 39): "b:EFBBBFCF80"
array item: [41, 53): "b:feff03c0"
container end: [4, 54): []
-array item: [58, 0): []
+array item: [58, 59): []
array start
array item: [59, 67): "u:🥔"
array item: [69, 85): "u:🥔"
diff --git a/qpdf/qtest/qpdf/qjson-objects-not-dict.out b/qpdf/qtest/qpdf/qjson-objects-not-dict.out
index 219b00e2..817ab4c4 100644
--- a/qpdf/qtest/qpdf/qjson-objects-not-dict.out
+++ b/qpdf/qtest/qpdf/qjson-objects-not-dict.out
@@ -1,3 +1,3 @@
-WARNING: qjson-objects-not-dict.json (offset 82): "qpdf[1]" must be a dictionary
+WARNING: qjson-objects-not-dict.json (offset 81): "qpdf[1]" must be a dictionary
WARNING: qjson-objects-not-dict.json: "qpdf[1].trailer" was not seen
qpdf: qjson-objects-not-dict.json: errors found in JSON
diff --git a/qpdf/qtest/qpdf/qjson-stream-not-dict.out b/qpdf/qtest/qpdf/qjson-stream-not-dict.out
index 6a462ff6..fbd953c6 100644
--- a/qpdf/qtest/qpdf/qjson-stream-not-dict.out
+++ b/qpdf/qtest/qpdf/qjson-stream-not-dict.out
@@ -1,3 +1,3 @@
-WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 123): "stream" must be a dictionary
+WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary
WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen
qpdf: qjson-stream-not-dict.json: errors found in JSON
diff --git a/qpdf/qtest/qpdf/qjson-trailer-not-dict.out b/qpdf/qtest/qpdf/qjson-trailer-not-dict.out
index 3b9d482d..b8fe65e1 100644
--- a/qpdf/qtest/qpdf/qjson-trailer-not-dict.out
+++ b/qpdf/qtest/qpdf/qjson-trailer-not-dict.out
@@ -1,2 +1,2 @@
-WARNING: qjson-trailer-not-dict.json (trailer, offset 1269): "trailer.value" must be a dictionary
+WARNING: qjson-trailer-not-dict.json (trailer, offset 1268): "trailer.value" must be a dictionary
qpdf: qjson-trailer-not-dict.json: errors found in JSON