From 3eb77a700434ed6d9b51e326fa4d49c530fcd473 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 20 May 2022 09:57:30 -0400 Subject: JSON: detect duplicate dictionary keys while parsing --- include/qpdf/JSON.hh | 10 ++++++++++ libqpdf/JSON.cc | 26 +++++++++++++++++++++++++- libtests/libtests.testcov | 1 + libtests/qtest/json_parse.test | 1 + libtests/qtest/json_parse/bad-40.json | 6 ++++++ libtests/qtest/json_parse/bad-40.out | 1 + 6 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 libtests/qtest/json_parse/bad-40.json create mode 100644 libtests/qtest/json_parse/bad-40.out diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index e711a2df..e6857ca6 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -149,6 +150,14 @@ class JSON QPDF_DLL bool isDictionary() const; + // If the key is already in the dictionary, return true. + // Otherwise, mark it has seen and return false. This is primarily + // intended to used by the parser to detect duplicate keys when + // the reactor blocks them from being added to the final + // dictionary. + QPDF_DLL + bool checkDictionaryKeySeen(std::string const& key); + // Accessors. Accessor behavior: // // - If argument is wrong type, including null, return false @@ -314,6 +323,7 @@ class JSON virtual ~JSON_dictionary() = default; virtual void write(Pipeline*, size_t depth) const; std::map> members; + std::set parsed_keys; }; struct JSON_array: public JSON_value { diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 3072a58b..3d0870af 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -274,6 +274,21 @@ JSON::addDictionaryMember(std::string const& key, JSON const& val) return obj->members[encode_string(key)]; } +bool +JSON::checkDictionaryKeySeen(std::string const& key) +{ + JSON_dictionary* obj = dynamic_cast(this->m->value.get()); + if (0 == obj) { + throw std::logic_error( + "JSON::checkDictionaryKey called on non-dictionary"); + } + if (obj->parsed_keys.count(key)) { + return true; + } + obj->parsed_keys.insert(key); + return false; +} + JSON JSON::makeArray() { @@ -565,7 +580,8 @@ namespace u_count(0), offset(0), done(false), - parser_state(ps_top) + parser_state(ps_top), + dict_key_offset(0) { } @@ -625,6 +641,7 @@ namespace std::vector> stack; std::vector ps_stack; std::string dict_key; + size_t dict_key_offset; }; } // namespace @@ -1201,11 +1218,18 @@ JSONParser::handleToken() case ps_dict_begin: case ps_dict_after_comma: this->dict_key = s_value; + this->dict_key_offset = item->getStart(); item = nullptr; next_state = ps_dict_after_key; break; case ps_dict_after_colon: + if (tos->checkDictionaryKeySeen(dict_key)) { + QTC::TC("libtests", "JSON parse duplicate key"); + throw std::runtime_error( + "JSON: offset " + QUtil::uint_to_string(dict_key_offset) + + ": duplicated dictionary key"); + } if (!reactor || !reactor->dictionaryItem(dict_key, *item)) { tos->addDictionaryMember(dict_key, *item); } diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 1f006e81..21def9f3 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -92,3 +92,4 @@ JSON optional key 0 JSON 16 high high 0 JSON 16 low not after high 0 JSON 16 dangling high 0 +JSON parse duplicate key 0 diff --git a/libtests/qtest/json_parse.test b/libtests/qtest/json_parse.test index 6d57e92c..112da0a9 100644 --- a/libtests/qtest/json_parse.test +++ b/libtests/qtest/json_parse.test @@ -120,6 +120,7 @@ my @bad = ( "stray low surrogate", # 37 "high high surrogate", # 38 "dangling high surrogate", # 39 + "duplicate dictionary key", # 40 ); my $i = 0; diff --git a/libtests/qtest/json_parse/bad-40.json b/libtests/qtest/json_parse/bad-40.json new file mode 100644 index 00000000..f60bcc11 --- /dev/null +++ b/libtests/qtest/json_parse/bad-40.json @@ -0,0 +1,6 @@ +{ + "one": 1, + "two": 2, + "one": 3, + "four": 4 +} diff --git a/libtests/qtest/json_parse/bad-40.out b/libtests/qtest/json_parse/bad-40.out new file mode 100644 index 00000000..28a78bfb --- /dev/null +++ b/libtests/qtest/json_parse/bad-40.out @@ -0,0 +1 @@ +exception: bad-40.json: JSON: offset 28: duplicated dictionary key -- cgit v1.2.3-54-g00ecf