aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/JSON.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libqpdf/JSON.cc')
-rw-r--r--libqpdf/JSON.cc1103
1 files changed, 579 insertions, 524 deletions
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc
index aa9b6e41..76db652b 100644
--- a/libqpdf/JSON.cc
+++ b/libqpdf/JSON.cc
@@ -4,19 +4,11 @@
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_Concatenate.hh>
#include <qpdf/Pl_String.hh>
-#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <cstring>
#include <stdexcept>
-template <typename T>
-static qpdf_offset_t
-toO(T const& i)
-{
- return QIntC::to_offset(i);
-}
-
JSON::Members::Members(std::shared_ptr<JSON_value> value) :
value(value),
start(0),
@@ -622,11 +614,6 @@ namespace
is(is),
reactor(reactor),
lex_state(ls_top),
- number_before_point(0),
- number_after_point(0),
- number_after_e(0),
- number_saw_point(false),
- number_saw_e(false),
bytes(0),
p(buf),
u_count(0),
@@ -637,21 +624,9 @@ namespace
{
}
- std::shared_ptr<JSON> parse();
+ JSON parse();
private:
- void getToken();
- void handleToken();
- static std::string
- decode_string(std::string const& json, qpdf_offset_t offset);
- static void handle_u_code(
- char const* s,
- qpdf_offset_t offset,
- qpdf_offset_t i,
- unsigned long& high_surrogate,
- qpdf_offset_t& high_offset,
- std::string& result);
-
enum parser_state_e {
ps_top,
ps_dict_begin,
@@ -668,30 +643,65 @@ namespace
enum lex_state_e {
ls_top,
ls_number,
+ ls_number_minus,
+ ls_number_leading_zero,
+ ls_number_before_point,
+ ls_number_point,
+ ls_number_after_point,
+ ls_number_e,
+ ls_number_e_sign,
ls_alpha,
ls_string,
ls_backslash,
ls_u4,
+ ls_begin_array,
+ ls_end_array,
+ ls_begin_dict,
+ ls_end_dict,
+ ls_colon,
+ ls_comma,
};
+ struct StackFrame
+ {
+ StackFrame(parser_state_e state, JSON& item) :
+ state(state),
+ item(item)
+ {
+ }
+
+ parser_state_e state;
+ JSON item;
+ };
+
+ void getToken();
+ void handleToken();
+ void tokenError();
+ static void handle_u_code(
+ unsigned long codepoint,
+ qpdf_offset_t offset,
+ unsigned long& high_surrogate,
+ qpdf_offset_t& high_offset,
+ std::string& result);
+ inline void append();
+ inline void append(lex_state_e);
+ inline void ignore();
+ inline void ignore(lex_state_e);
+
InputSource& is;
JSON::Reactor* reactor;
lex_state_e lex_state;
- size_t number_before_point;
- size_t number_after_point;
- size_t number_after_e;
- bool number_saw_point;
- bool number_saw_e;
char buf[16384];
size_t bytes;
char const* p;
qpdf_offset_t u_count;
+ unsigned long u_value{0};
qpdf_offset_t offset;
bool done;
std::string token;
+ qpdf_offset_t token_start{0};
parser_state_e parser_state;
- std::vector<std::shared_ptr<JSON>> stack;
- std::vector<parser_state_e> ps_stack;
+ std::vector<StackFrame> stack;
std::string dict_key;
qpdf_offset_t dict_key_offset;
};
@@ -699,22 +709,15 @@ namespace
void
JSONParser::handle_u_code(
- char const* s,
+ unsigned long codepoint,
qpdf_offset_t offset,
- qpdf_offset_t i,
unsigned long& high_surrogate,
qpdf_offset_t& high_offset,
std::string& result)
{
- std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));
- unsigned char high = static_cast<unsigned char>(hex.at(0));
- unsigned char low = static_cast<unsigned char>(hex.at(1));
- unsigned long codepoint = high;
- codepoint <<= 8;
- codepoint += low;
if ((codepoint & 0xFC00) == 0xD800) {
// high surrogate
- qpdf_offset_t new_high_offset = offset + i;
+ qpdf_offset_t new_high_offset = offset;
if (high_offset) {
QTC::TC("libtests", "JSON 16 high high");
throw std::runtime_error(
@@ -727,10 +730,10 @@ JSONParser::handle_u_code(
high_surrogate = codepoint;
} else if ((codepoint & 0xFC00) == 0xDC00) {
// low surrogate
- if (offset + i != (high_offset + 6)) {
+ if (offset != (high_offset + 6)) {
QTC::TC("libtests", "JSON 16 low not after high");
throw std::runtime_error(
- "JSON: offset " + std::to_string(offset + i) +
+ "JSON: offset " + std::to_string(offset) +
": UTF-16 low surrogate found not immediately after high"
" surrogate");
}
@@ -743,88 +746,123 @@ JSONParser::handle_u_code(
}
}
-std::string
-JSONParser::decode_string(std::string const& str, qpdf_offset_t offset)
+void
+JSONParser::tokenError()
{
- // The string has already been validated when this private method
- // is called, so errors are logic errors instead of runtime
- // errors.
- size_t len = str.length();
- if ((len < 2) || (str.at(0) != '"') || (str.at(len - 1) != '"')) {
- throw std::logic_error(
- "JSON Parse: decode_string called with other than \"...\"");
+ if (done) {
+ QTC::TC("libtests", "JSON parse ls premature end of input");
+ throw std::runtime_error("JSON: premature end of input");
}
- char const* s = str.c_str();
- // Move inside the quotation marks
- ++s;
- len -= 2;
- // Keep track of UTF-16 surrogate pairs.
- unsigned long high_surrogate = 0;
- qpdf_offset_t high_offset = 0;
- std::string result;
- qpdf_offset_t olen = toO(len);
- for (qpdf_offset_t i = 0; i < olen; ++i) {
- if (s[i] == '\\') {
- if (i + 1 >= olen) {
- throw std::logic_error("JSON parse: nothing after \\");
- }
- char ch = s[++i];
- switch (ch) {
- case '\\':
- case '\"':
- case '/':
- // \/ is allowed in json input, but so is /, so we
- // don't map / to \/ in output.
- result.append(1, ch);
- break;
- case 'b':
- result.append(1, '\b');
- break;
- case 'f':
- result.append(1, '\f');
- break;
- case 'n':
- result.append(1, '\n');
- break;
- case 'r':
- result.append(1, '\r');
- break;
- case 't':
- result.append(1, '\t');
- break;
- case 'u':
- if (i + 4 >= olen) {
- throw std::logic_error(
- "JSON parse: not enough characters after \\u");
- }
- handle_u_code(
- s, offset, i, high_surrogate, high_offset, result);
- i += 4;
- break;
- default:
- throw std::logic_error("JSON parse: bad character after \\");
- break;
- }
+
+ if (lex_state == ls_u4) {
+ QTC::TC("libtests", "JSON parse bad hex after u");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset - u_count - 1) +
+ ": \\u must be followed by four hex digits");
+ } else if (lex_state == ls_alpha) {
+ QTC::TC("libtests", "JSON parse keyword bad character");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": keyword: unexpected character " + std::string(p, 1));
+ } else if (lex_state == ls_string) {
+ QTC::TC("libtests", "JSON parse control char in string");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": control character in string (missing \"?)");
+ } else if (lex_state == ls_backslash) {
+ QTC::TC("libtests", "JSON parse backslash bad character");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": invalid character after backslash: " + std::string(p, 1));
+ }
+
+ if (*p == '.') {
+ if (lex_state == ls_number || lex_state == ls_number_e ||
+ lex_state == ls_number_e_sign) {
+ QTC::TC("libtests", "JSON parse point after e");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: decimal point after e");
} else {
- result.append(1, s[i]);
+ QTC::TC("libtests", "JSON parse duplicate point");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: decimal point already seen");
}
- }
- if (high_offset) {
- QTC::TC("libtests", "JSON 16 dangling high");
+ } else if (*p == 'e' || *p == 'E') {
+ QTC::TC("libtests", "JSON parse duplicate e");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: e already seen");
+ } else if ((*p == '+') || (*p == '-')) {
+ QTC::TC("libtests", "JSON parse unexpected sign");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: unexpected sign");
+ } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) {
+ QTC::TC("libtests", "JSON parse incomplete number");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: incomplete number");
+
+ } else {
+ QTC::TC("libtests", "JSON parse numeric bad character");
throw std::runtime_error(
- "JSON: offset " + std::to_string(high_offset) +
- ": UTF-16 high surrogate not followed by low surrogate");
+ "JSON: offset " + std::to_string(offset) +
+ ": numeric literal: unexpected character " + std::string(p, 1));
}
- return result;
+ throw std::logic_error("JSON::tokenError : unhandled error");
+}
+
+// Append current character to token and advance to next input character.
+inline void
+JSONParser::append()
+{
+ token += *p;
+ ++p;
+ ++offset;
+}
+
+// Append current character to token, advance to next input character and
+// transition to 'next' lexer state.
+inline void
+JSONParser::append(lex_state_e next)
+{
+ lex_state = next;
+ token += *p;
+ ++p;
+ ++offset;
+}
+
+// Advance to next input character without appending the current character to
+// token.
+inline void
+JSONParser::ignore()
+{
+ ++p;
+ ++offset;
+}
+
+// Advance to next input character without appending the current character to
+// token and transition to 'next' lexer state.
+inline void
+JSONParser::ignore(lex_state_e next)
+{
+ lex_state = next;
+ ++p;
+ ++offset;
}
void
JSONParser::getToken()
{
- enum { append, ignore, reread } action = append;
- bool ready = false;
token.clear();
- while (!done) {
+
+ // Keep track of UTF-16 surrogate pairs.
+ unsigned long high_surrogate = 0;
+ qpdf_offset_t high_offset = 0;
+
+ while (true) {
if (p == (buf + bytes)) {
p = buf;
bytes = is.read(buf, sizeof(buf));
@@ -834,210 +872,316 @@ JSONParser::getToken()
}
}
- if (*p == 0) {
- QTC::TC("libtests", "JSON parse null character");
- throw std::runtime_error(
- "JSON: null character at offset " + std::to_string(offset));
- }
- action = append;
- switch (lex_state) {
- case ls_top:
- if (*p == '"') {
- lex_state = ls_string;
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- } else if ((*p >= 'a') && (*p <= 'z')) {
- lex_state = ls_alpha;
- } else if (*p == '-') {
- lex_state = ls_number;
- number_before_point = 0;
- number_after_point = 0;
- number_after_e = 0;
- number_saw_point = false;
- number_saw_e = false;
- } else if ((*p >= '0') && (*p <= '9')) {
- lex_state = ls_number;
- number_before_point = 1;
- number_after_point = 0;
- number_after_e = 0;
- number_saw_point = false;
- number_saw_e = false;
- } else if (*p == '.') {
- lex_state = ls_number;
- number_before_point = 0;
- number_after_point = 0;
- number_after_e = 0;
- number_saw_point = true;
- number_saw_e = false;
- } else if (strchr("{}[]:,", *p)) {
- ready = true;
+ if ((*p < 32 && *p >= 0)) {
+ if (*p == '\t' || *p == '\n' || *p == '\r') {
+ // Legal white space not permitted in strings. This will always
+ // end the current token (unless we are still before the start
+ // of the token).
+ if (lex_state == ls_top) {
+ ignore();
+ } else {
+ break;
+ }
+
} else {
- QTC::TC("libtests", "JSON parse bad character");
+ QTC::TC("libtests", "JSON parse null character");
throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected character " + std::string(p, 1));
+ "JSON: control or null character at offset " +
+ std::to_string(offset));
}
- break;
-
- case ls_number:
- if ((*p >= '0') && (*p <= '9')) {
- if (number_saw_e) {
- ++number_after_e;
- } else if (number_saw_point) {
- ++number_after_point;
+ } else if (*p == ',') {
+ if (lex_state == ls_top) {
+ ignore(ls_comma);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == ':') {
+ if (lex_state == ls_top) {
+ ignore(ls_colon);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == ' ') {
+ if (lex_state == ls_top) {
+ ignore();
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == '{') {
+ if (lex_state == ls_top) {
+ token_start = offset;
+ ignore(ls_begin_dict);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == '}') {
+ if (lex_state == ls_top) {
+ ignore(ls_end_dict);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == '[') {
+ if (lex_state == ls_top) {
+ token_start = offset;
+ ignore(ls_begin_array);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else if (*p == ']') {
+ if (lex_state == ls_top) {
+ ignore(ls_end_array);
+ return;
+ } else if (lex_state == ls_string) {
+ append();
+ } else {
+ break;
+ }
+ } else {
+ switch (lex_state) {
+ case ls_top:
+ token_start = offset;
+ if (*p == '"') {
+ ignore(ls_string);
+ } else if ((*p >= 'a') && (*p <= 'z')) {
+ append(ls_alpha);
+ } else if (*p == '-') {
+ append(ls_number_minus);
+ } else if ((*p >= '1') && (*p <= '9')) {
+ append(ls_number_before_point);
+ } else if (*p == '0') {
+ append(ls_number_leading_zero);
} else {
- ++number_before_point;
- }
- } else if (*p == '.') {
- if (number_saw_e) {
- QTC::TC("libtests", "JSON parse point after e");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": numeric literal: decimal point after e");
- } else if (number_saw_point) {
- QTC::TC("libtests", "JSON parse duplicate point");
+ QTC::TC("libtests", "JSON parse bad character");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": numeric literal: decimal point already seen");
- } else {
- number_saw_point = true;
+ ": unexpected character " + std::string(p, 1));
}
- } else if (*p == 'e') {
- if (number_saw_e) {
- QTC::TC("libtests", "JSON parse duplicate e");
+ break;
+
+ case ls_number_minus:
+ if ((*p >= '1') && (*p <= '9')) {
+ append(ls_number_before_point);
+ } else if (*p == '0') {
+ append(ls_number_leading_zero);
+ } else {
+ QTC::TC("libtests", "JSON parse number minus no digits");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": numeric literal: e already seen");
- } else {
- number_saw_e = true;
+ ": numeric literal: no digit after minus sign");
}
- } else if ((*p == '+') || (*p == '-')) {
- if (number_saw_e && (number_after_e == 0)) {
- // okay
+ break;
+
+ case ls_number_leading_zero:
+ if (*p == '.') {
+ append(ls_number_point);
+ } else if (*p == 'e' || *p == 'E') {
+ append(ls_number_e);
} else {
- QTC::TC("libtests", "JSON parse unexpected sign");
+ QTC::TC("libtests", "JSON parse leading zero");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": numeric literal: unexpected sign");
+ ": number with leading zero");
}
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
- } else {
- QTC::TC("libtests", "JSON parse numeric bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": numeric literal: unexpected character " +
- std::string(p, 1));
- }
- break;
+ break;
- case ls_alpha:
- if ((*p >= 'a') && (*p <= 'z')) {
- // okay
- } else if (QUtil::is_space(*p)) {
- action = ignore;
- ready = true;
- } else if (strchr("{}[]:,", *p)) {
- action = reread;
- ready = true;
- } else {
- QTC::TC("libtests", "JSON parse keyword bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": keyword: unexpected character " + std::string(p, 1));
- }
- break;
+ case ls_number_before_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ append();
+ } else if (*p == '.') {
+ append(ls_number_point);
+ } else if (*p == 'e' || *p == 'E') {
+ append(ls_number_e);
+ } else {
+ tokenError();
+ }
+ break;
- case ls_string:
- if (*p == '"') {
- ready = true;
- } else if (*p == '\\') {
- lex_state = ls_backslash;
- }
- break;
+ case ls_number_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ append(ls_number_after_point);
+ } else {
+ tokenError();
+ }
+ break;
- case ls_backslash:
- /* cSpell: ignore bfnrt */
- if (strchr("\\\"/bfnrt", *p)) {
- lex_state = ls_string;
- } else if (*p == 'u') {
- lex_state = ls_u4;
- u_count = 0;
- } else {
- QTC::TC("libtests", "JSON parse backslash bad character");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": invalid character after backslash: " +
- std::string(p, 1));
- }
- break;
+ case ls_number_after_point:
+ if ((*p >= '0') && (*p <= '9')) {
+ append();
+ } else if (*p == 'e' || *p == 'E') {
+ append(ls_number_e);
+ } else {
+ tokenError();
+ }
+ break;
- case ls_u4:
- if (!QUtil::is_hex_digit(*p)) {
- QTC::TC("libtests", "JSON parse bad hex after u");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset - u_count - 1) +
- ": \\u must be followed by four hex digits");
- }
- if (++u_count == 4) {
- lex_state = ls_string;
- }
- break;
- }
- switch (action) {
- case reread:
- break;
- case append:
- token.append(1, *p);
- // fall through
- case ignore:
- ++p;
- ++offset;
- break;
- }
- if (ready) {
- break;
- }
- }
- if (done) {
- if ((!token.empty()) && (!ready)) {
- switch (lex_state) {
- case ls_top:
- // Can't happen
- throw std::logic_error("tok_start set in ls_top while parsing");
+ case ls_number_e:
+ if ((*p >= '0') && (*p <= '9')) {
+ append(ls_number);
+ } else if ((*p == '+') || (*p == '-')) {
+ append(ls_number_e_sign);
+ } else {
+ tokenError();
+ }
+ break;
+
+ case ls_number_e_sign:
+ if ((*p >= '0') && (*p <= '9')) {
+ append(ls_number);
+ } else {
+ tokenError();
+ }
break;
case ls_number:
- case ls_alpha:
- // okay
+ // We only get here after we have seen an exponent.
+ if ((*p >= '0') && (*p <= '9')) {
+ append();
+ } else {
+ tokenError();
+ }
break;
- case ls_u4:
- QTC::TC("libtests", "JSON parse premature end of u");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset - u_count - 1) +
- ": \\u must be followed by four characters");
+ case ls_alpha:
+ if ((*p >= 'a') && (*p <= 'z')) {
+ append();
+ } else {
+ tokenError();
+ }
+ break;
case ls_string:
+ if (*p == '"') {
+ if (high_offset) {
+ QTC::TC("libtests", "JSON 16 dangling high");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(high_offset) +
+ ": UTF-16 high surrogate not followed by low "
+ "surrogate");
+ }
+ ignore();
+ return;
+ } else if (*p == '\\') {
+ ignore(ls_backslash);
+ } else {
+ append();
+ }
+ break;
+
case ls_backslash:
- QTC::TC("libtests", "JSON parse unterminated string");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unterminated string");
+ lex_state = ls_string;
+ switch (*p) {
+ case '\\':
+ case '\"':
+ case '/':
+ // \/ is allowed in json input, but so is /, so we
+ // don't map / to \/ in output.
+ token += *p;
+ break;
+ case 'b':
+ token += '\b';
+ break;
+ case 'f':
+ token += '\f';
+ break;
+ case 'n':
+ token += '\n';
+ break;
+ case 'r':
+ token += '\r';
+ break;
+ case 't':
+ token += '\t';
+ break;
+ case 'u':
+ lex_state = ls_u4;
+ u_count = 0;
+ u_value = 0;
+ break;
+ default:
+ lex_state = ls_backslash;
+ tokenError();
+ }
+ ignore();
break;
+
+ case ls_u4:
+ using ui = unsigned int;
+ if ('0' <= *p && *p <= '9') {
+ u_value = 16 * u_value + (ui(*p) - ui('0'));
+ } else if ('a' <= *p && *p <= 'f') {
+ u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
+ } else if ('A' <= *p && *p <= 'F') {
+ u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
+ } else {
+ tokenError();
+ }
+ if (++u_count == 4) {
+ handle_u_code(
+ u_value,
+ offset - 5,
+ high_surrogate,
+ high_offset,
+ token);
+ lex_state = ls_string;
+ }
+ ignore();
+ break;
+
+ default:
+ throw std::logic_error(
+ "JSONParser::getToken : trying to handle delimiter state");
}
}
}
+
+ // We only get here if on end of input or if the last character was a
+ // control character or other delimiter.
+
+ if (!token.empty()) {
+ switch (lex_state) {
+ case ls_top:
+ // Can't happen
+ throw std::logic_error("tok_start set in ls_top while parsing");
+ break;
+
+ case ls_number_leading_zero:
+ case ls_number_before_point:
+ case ls_number_after_point:
+ lex_state = ls_number;
+ break;
+
+ case ls_number:
+ case ls_alpha:
+ // terminal state
+ break;
+
+ default:
+ tokenError();
+ }
+ }
}
void
JSONParser::handleToken()
{
- if (token.empty()) {
+ if (lex_state == ls_top) {
return;
}
@@ -1048,73 +1192,96 @@ JSONParser::handleToken()
": material follows end of object: " + token);
}
- // Git string value
- std::string s_value;
- if (lex_state == ls_string) {
- // Token includes the quotation marks
- if (token.length() < 2) {
- throw std::logic_error("JSON string length < 2");
- }
- s_value = decode_string(token, offset - toO(token.length()));
- }
- // Based on the lexical state and value, figure out whether we are
- // looking at an item or a delimiter. It will always be exactly
- // one of those two or an error condition.
-
- std::shared_ptr<JSON> item;
- char delimiter = '\0';
- // Already verified that token is not empty
- char first_char = token.at(0);
- switch (lex_state) {
- case ls_top:
- switch (first_char) {
- case '{':
- item = std::make_shared<JSON>(JSON::makeDictionary());
- item->setStart(offset - toO(token.length()));
- break;
+ const static JSON null_item = JSON::makeNull();
+ JSON item;
+ auto tos = stack.empty() ? null_item : stack.back().item;
+ auto ls = lex_state;
+ lex_state = ls_top;
- case '[':
- item = std::make_shared<JSON>(JSON::makeArray());
- item->setStart(offset - toO(token.length()));
- break;
+ switch (ls) {
+ case ls_begin_dict:
+ item = JSON::makeDictionary();
+ break;
- default:
- delimiter = first_char;
- break;
- }
+ case ls_begin_array:
+ item = JSON::makeArray();
break;
- case ls_number:
- if (number_saw_point && (number_after_point == 0)) {
- QTC::TC("libtests", "JSON parse decimal with no digits");
+ case ls_colon:
+ if (parser_state != ps_dict_after_key) {
+ QTC::TC("libtests", "JSON parse unexpected :");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": decimal point with no digits");
+ ": unexpected colon");
}
- if ((number_before_point > 1) &&
- ((first_char == '0') ||
- ((first_char == '-') && (token.at(1) == '0')))) {
- QTC::TC("libtests", "JSON parse leading zero");
+ parser_state = ps_dict_after_colon;
+ return;
+
+ case ls_comma:
+ if (!((parser_state == ps_dict_after_item) ||
+ (parser_state == ps_array_after_item))) {
+ QTC::TC("libtests", "JSON parse unexpected ,");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": number with leading zero");
+ ": unexpected comma");
}
- if ((number_before_point == 0) && (number_after_point == 0)) {
- QTC::TC("libtests", "JSON parse number no digits");
+ if (parser_state == ps_dict_after_item) {
+ parser_state = ps_dict_after_comma;
+ } else if (parser_state == ps_array_after_item) {
+ parser_state = ps_array_after_comma;
+ } else {
+ throw std::logic_error("JSONParser::handleToken: unexpected parser"
+ " state for comma");
+ }
+ return;
+
+ case ls_end_array:
+ if (!(parser_state == ps_array_begin ||
+ parser_state == ps_array_after_item)) {
+ QTC::TC("libtests", "JSON parse unexpected ]");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": number with no digits");
+ ": unexpected array end delimiter");
}
- item = std::make_shared<JSON>(JSON::makeNumber(token));
+ parser_state = stack.back().state;
+ tos.setEnd(offset);
+ if (reactor) {
+ reactor->containerEnd(tos);
+ }
+ if (parser_state != ps_done) {
+ stack.pop_back();
+ }
+ return;
+
+ case ls_end_dict:
+ if (!((parser_state == ps_dict_begin) ||
+ (parser_state == ps_dict_after_item))) {
+ QTC::TC("libtests", "JSON parse unexpected }");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": unexpected dictionary end delimiter");
+ }
+ parser_state = stack.back().state;
+ tos.setEnd(offset);
+ if (reactor) {
+ reactor->containerEnd(tos);
+ }
+ if (parser_state != ps_done) {
+ stack.pop_back();
+ }
+ return;
+
+ case ls_number:
+ item = JSON::makeNumber(token);
break;
case ls_alpha:
if (token == "true") {
- item = std::make_shared<JSON>(JSON::makeBool(true));
+ item = JSON::makeBool(true);
} else if (token == "false") {
- item = std::make_shared<JSON>(JSON::makeBool(false));
+ item = JSON::makeBool(false);
} else if (token == "null") {
- item = std::make_shared<JSON>(JSON::makeNull());
+ item = JSON::makeNull();
} else {
QTC::TC("libtests", "JSON parse invalid keyword");
throw std::runtime_error(
@@ -1124,227 +1291,115 @@ JSONParser::handleToken()
break;
case ls_string:
- item = std::make_shared<JSON>(JSON::makeString(s_value));
+ if (parser_state == ps_dict_begin ||
+ parser_state == ps_dict_after_comma) {
+ dict_key = token;
+ dict_key_offset = token_start;
+ parser_state = ps_dict_after_key;
+ return;
+ } else {
+ item = JSON::makeString(token);
+ }
break;
- case ls_backslash:
- case ls_u4:
+ default:
throw std::logic_error(
- "tok_end is set while state = ls_backslash or ls_u4");
+ "JSONParser::handleToken : non-terminal lexer state encountered");
break;
}
- if ((item == nullptr) == (delimiter == '\0')) {
- throw std::logic_error(
- "JSONParser::handleToken: logic error: exactly one of item"
- " or delimiter must be set");
- }
-
- // See whether what we have is allowed at this point.
-
- if (item.get()) {
- switch (parser_state) {
- case ps_done:
- throw std::logic_error("can't happen; ps_done already handled");
- break;
-
- case ps_dict_after_key:
- QTC::TC("libtests", "JSON parse expected colon");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) + ": expected ':'");
- break;
-
- case ps_dict_after_item:
- QTC::TC("libtests", "JSON parse expected , or }");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": expected ',' or '}'");
- break;
-
- case ps_array_after_item:
- QTC::TC("libtests", "JSON parse expected, or ]");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": expected ',' or ']'");
- break;
-
- case ps_dict_begin:
- case ps_dict_after_comma:
- if (lex_state != ls_string) {
- QTC::TC("libtests", "JSON parse string as dict key");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": expect string as dictionary key");
- }
- break;
-
- case ps_top:
- case ps_dict_after_colon:
- case ps_array_begin:
- case ps_array_after_comma:
- break;
- // okay
- }
- } else if (delimiter == '}') {
- if (!((parser_state == ps_dict_begin) ||
- (parser_state == ps_dict_after_item)))
+ item.setStart(token_start);
+ item.setEnd(offset);
- {
- QTC::TC("libtests", "JSON parse unexpected }");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected dictionary end delimiter");
- }
- } else if (delimiter == ']') {
- if (!((parser_state == ps_array_begin) ||
- (parser_state == ps_array_after_item)))
+ switch (parser_state) {
+ case ps_dict_begin:
+ case ps_dict_after_comma:
+ QTC::TC("libtests", "JSON parse string as dict key");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": expect string as dictionary key");
+ break;
- {
- QTC::TC("libtests", "JSON parse unexpected ]");
+ case ps_dict_after_colon:
+ if (tos.checkDictionaryKeySeen(dict_key)) {
+ QTC::TC("libtests", "JSON parse duplicate key");
throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected array end delimiter");
+ "JSON: offset " + std::to_string(dict_key_offset) +
+ ": duplicated dictionary key");
}
- } else if (delimiter == ':') {
- if (parser_state != ps_dict_after_key) {
- QTC::TC("libtests", "JSON parse unexpected :");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected colon");
- }
- } else if (delimiter == ',') {
- if (!((parser_state == ps_dict_after_item) ||
- (parser_state == ps_array_after_item))) {
- QTC::TC("libtests", "JSON parse unexpected ,");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": unexpected comma");
+ if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
+ tos.addDictionaryMember(dict_key, item);
}
- } else if (delimiter != '\0') {
- throw std::logic_error("JSONParser::handleToken: bad delimiter");
- }
-
- // Now we know we have a delimiter or item that is allowed. Do
- // whatever we need to do with it.
+ parser_state = ps_dict_after_item;
+ break;
- parser_state_e next_state = ps_top;
- if (delimiter == ':') {
- next_state = ps_dict_after_colon;
- } else if (delimiter == ',') {
- if (parser_state == ps_dict_after_item) {
- next_state = ps_dict_after_comma;
- } else if (parser_state == ps_array_after_item) {
- next_state = ps_array_after_comma;
- } else {
- throw std::logic_error("JSONParser::handleToken: unexpected parser"
- " state for comma");
- }
- } else if ((delimiter == '}') || (delimiter == ']')) {
- next_state = ps_stack.back();
- ps_stack.pop_back();
- auto tos = stack.back();
- tos->setEnd(offset);
- if (reactor) {
- reactor->containerEnd(*tos);
- }
- if (next_state != ps_done) {
- stack.pop_back();
- }
- } else if (delimiter != '\0') {
- throw std::logic_error(
- "JSONParser::handleToken: unexpected delimiter in transition");
- } else if (item.get()) {
- if (!(item->isArray() || item->isDictionary())) {
- item->setStart(offset - toO(token.length()));
- item->setEnd(offset);
+ case ps_array_begin:
+ case ps_array_after_comma:
+ if (!reactor || !reactor->arrayItem(item)) {
+ tos.addArrayElement(item);
}
+ parser_state = ps_array_after_item;
+ break;
- std::shared_ptr<JSON> tos;
- if (!stack.empty()) {
- tos = stack.back();
+ case ps_top:
+ if (!(item.isDictionary() || item.isArray())) {
+ stack.push_back({ps_done, item});
+ parser_state = ps_done;
+ return;
}
- switch (parser_state) {
- case ps_dict_begin:
- case ps_dict_after_comma:
- this->dict_key = s_value;
- this->dict_key_offset = item->getStart();
- item = nullptr;
- next_state = ps_dict_after_key;
- break;
+ parser_state = ps_done;
+ break;
- case ps_dict_after_colon:
- if (tos->checkDictionaryKeySeen(dict_key)) {
- QTC::TC("libtests", "JSON parse duplicate key");
- throw std::runtime_error(
- "JSON: offset " + std::to_string(dict_key_offset) +
- ": duplicated dictionary key");
- }
- if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {
- tos->addDictionaryMember(dict_key, *item);
- }
- next_state = ps_dict_after_item;
- break;
+ case ps_dict_after_key:
+ QTC::TC("libtests", "JSON parse expected colon");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) + ": expected ':'");
+ break;
- case ps_array_begin:
- case ps_array_after_comma:
- if (!reactor || !reactor->arrayItem(*item)) {
- tos->addArrayElement(*item);
- }
- next_state = ps_array_after_item;
- break;
+ case ps_dict_after_item:
+ QTC::TC("libtests", "JSON parse expected , or }");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
+ break;
- case ps_top:
- next_state = ps_done;
- break;
+ case ps_array_after_item:
+ QTC::TC("libtests", "JSON parse expected, or ]");
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
+ break;
- case ps_dict_after_key:
- case ps_dict_after_item:
- case ps_array_after_item:
- case ps_done:
- throw std::logic_error(
- "JSONParser::handleToken: unexpected parser state");
- }
- } else {
+ case ps_done:
throw std::logic_error(
- "JSONParser::handleToken: unexpected null item in transition");
+ "JSONParser::handleToken: unexpected parser state");
}
- if (reactor && item.get()) {
+ if (item.isDictionary() || item.isArray()) {
+ stack.push_back({parser_state, item});
// Calling container start method is postponed until after
// adding the containers to their parent containers, if any.
// This makes it much easier to keep track of the current
// nesting level.
- if (item->isDictionary()) {
- reactor->dictionaryStart();
- } else if (item->isArray()) {
- reactor->arrayStart();
+ if (item.isDictionary()) {
+ if (reactor) {
+ reactor->dictionaryStart();
+ }
+ parser_state = ps_dict_begin;
+ } else if (item.isArray()) {
+ if (reactor) {
+ reactor->arrayStart();
+ }
+ parser_state = ps_array_begin;
}
- }
- // Prepare for next token
- if (item.get()) {
- if (item->isDictionary()) {
- stack.push_back(item);
- ps_stack.push_back(next_state);
- next_state = ps_dict_begin;
- } else if (item->isArray()) {
- stack.push_back(item);
- ps_stack.push_back(next_state);
- next_state = ps_array_begin;
- } else if (parser_state == ps_top) {
- stack.push_back(item);
+ if (stack.size() > 500) {
+ throw std::runtime_error(
+ "JSON: offset " + std::to_string(offset) +
+ ": maximum object depth exceeded");
}
}
- if (ps_stack.size() > 500) {
- throw std::runtime_error(
- "JSON: offset " + std::to_string(offset) +
- ": maximum object depth exceeded");
- }
- parser_state = next_state;
- lex_state = ls_top;
}
-std::shared_ptr<JSON>
+JSON
JSONParser::parse()
{
while (!done) {
@@ -1355,8 +1410,8 @@ JSONParser::parse()
QTC::TC("libtests", "JSON parse premature EOF");
throw std::runtime_error("JSON: premature end of input");
}
- auto const& tos = stack.back();
- if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) {
+ auto const& tos = stack.back().item;
+ if (reactor && !(tos.isArray() || tos.isDictionary())) {
reactor->topLevelScalar();
}
return tos;
@@ -1366,7 +1421,7 @@ JSON
JSON::parse(InputSource& is, Reactor* reactor)
{
JSONParser jp(is, reactor);
- return *jp.parse();
+ return jp.parse();
}
JSON
@@ -1374,7 +1429,7 @@ JSON::parse(std::string const& s)
{
BufferInputSource bis("json input", s);
JSONParser jp(bis, nullptr);
- return *jp.parse();
+ return jp.parse();
}
void