aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDFParser.cc
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2023-10-27 18:41:36 +0200
committerm-holger <m-holger@kubitscheck.org>2023-11-01 16:27:52 +0100
commit90829228b814c6fe3ea3192da34db90dc1e36843 (patch)
treec014810bfaaed5a7082831331bfe94a4cc21e59c /libqpdf/QPDFParser.cc
parent12837f14b6c793313778ca13ca3f8e615d41117b (diff)
downloadqpdf-90829228b814c6fe3ea3192da34db90dc1e36843.tar.zst
In QPDFParser::parse refactor handling of dict_close tokens
Diffstat (limited to 'libqpdf/QPDFParser.cc')
-rw-r--r--libqpdf/QPDFParser.cc159
1 files changed, 75 insertions, 84 deletions
diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc
index 2a77c731..5bff6af3 100644
--- a/libqpdf/QPDFParser.cc
+++ b/libqpdf/QPDFParser.cc
@@ -138,7 +138,80 @@ QPDFParser::parse(bool& empty, bool content_stream)
case QPDFTokenizer::tt_dict_close:
if (state == st_dictionary) {
- state = st_stop;
+ if ((state_stack.size() < 2) || (stack.size() < 2)) {
+ throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
+ "insufficient elements in stack");
+ }
+
+ // Convert list to map. Alternating elements are keys. Attempt to recover more or
+ // less gracefully from invalid dictionaries.
+ std::set<std::string> names;
+ for (auto& obj: olist) {
+ if (obj) {
+ if (obj->getTypeCode() == ::ot_name) {
+ names.insert(obj->getStringValue());
+ }
+ }
+ }
+
+ std::map<std::string, QPDFObjectHandle> dict;
+ int next_fake_key = 1;
+ for (auto iter = olist.begin(); iter != olist.end();) {
+ // Calculate key.
+ std::string key;
+ if (*iter && (*iter)->getTypeCode() == ::ot_name) {
+ key = (*iter)->getStringValue();
+ ++iter;
+ } else {
+ for (bool found_fake = false; !found_fake;) {
+ key = "/QPDFFake" + std::to_string(next_fake_key++);
+ found_fake = (names.count(key) == 0);
+ QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
+ }
+ warn(
+ offset,
+ "expected dictionary key but found non-name object; inserting key " +
+ key);
+ }
+ if (dict.count(key) > 0) {
+ QTC::TC("qpdf", "QPDFParser duplicate dict key");
+ warn(
+ offset,
+ "dictionary has duplicated key " + key +
+ "; last occurrence overrides earlier ones");
+ }
+
+ // Calculate value.
+ std::shared_ptr<QPDFObject> val;
+ if (iter != olist.end()) {
+ val = *iter;
+ ++iter;
+ } else {
+ QTC::TC("qpdf", "QPDFParser no val for last key");
+ warn(
+ offset,
+ "dictionary ended prematurely; using null as value for last key");
+ val = QPDF_Null::create();
+ }
+
+ dict[std::move(key)] = std::move(val);
+ }
+ if (!frame.contents_string.empty() && dict.count("/Type") &&
+ dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
+ dict.count("/Contents") && dict["/Contents"].isString()) {
+ dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
+ dict["/Contents"].setParsedOffset(frame.contents_offset);
+ }
+ object = QPDF_Dictionary::create(std::move(dict));
+ setDescription(object, offset - 2);
+ // The `offset` points to the next of "<<". Set the rewind offset to point to the
+ // beginning of "<<". This has been explicitly tested with whitespace surrounding
+ // the dictionary start delimiter. getLastOffset points to the dictionary end token
+ // and therefore can't be used here.
+ set_offset = true;
+ state_stack.pop_back();
+ state = state_stack.back();
+ stack.pop_back();
} else {
QTC::TC("qpdf", "QPDFParser bad dictionary close");
warn("unexpected dictionary close token");
@@ -276,7 +349,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
break;
}
- if (object == nullptr && !is_null && (!(state == st_start || state == st_stop))) {
+ if (object == nullptr && !is_null && state != st_start) {
throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
}
@@ -299,88 +372,6 @@ QPDFParser::parse(bool& empty, bool content_stream)
case st_start:
break;
-
- case st_stop:
- if ((state_stack.size() < 2) || (stack.size() < 2)) {
- throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
- "insufficient elements in stack");
- }
- parser_state_e old_state = state_stack.back();
- state_stack.pop_back();
- if (old_state == st_dictionary) {
- // Convert list to map. Alternating elements are keys. Attempt to recover more or
- // less gracefully from invalid dictionaries.
- std::set<std::string> names;
- for (auto& obj: olist) {
- if (obj) {
- if (obj->getTypeCode() == ::ot_name) {
- names.insert(obj->getStringValue());
- }
- }
- }
-
- std::map<std::string, QPDFObjectHandle> dict;
- int next_fake_key = 1;
- for (auto iter = olist.begin(); iter != olist.end();) {
- // Calculate key.
- std::string key;
- if (*iter && (*iter)->getTypeCode() == ::ot_name) {
- key = (*iter)->getStringValue();
- ++iter;
- } else {
- for (bool found_fake = false; !found_fake;) {
- key = "/QPDFFake" + std::to_string(next_fake_key++);
- found_fake = (names.count(key) == 0);
- QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
- }
- warn(
- offset,
- "expected dictionary key but found non-name object; inserting key " +
- key);
- }
- if (dict.count(key) > 0) {
- QTC::TC("qpdf", "QPDFParser duplicate dict key");
- warn(
- offset,
- "dictionary has duplicated key " + key +
- "; last occurrence overrides earlier ones");
- }
-
- // Calculate value.
- std::shared_ptr<QPDFObject> val;
- if (iter != olist.end()) {
- val = *iter;
- ++iter;
- } else {
- QTC::TC("qpdf", "QPDFParser no val for last key");
- warn(
- offset,
- "dictionary ended prematurely; using null as value for last key");
- val = QPDF_Null::create();
- }
-
- dict[std::move(key)] = std::move(val);
- }
- if (!frame.contents_string.empty() && dict.count("/Type") &&
- dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
- dict.count("/Contents") && dict["/Contents"].isString()) {
- dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
- dict["/Contents"].setParsedOffset(frame.contents_offset);
- }
- object = QPDF_Dictionary::create(std::move(dict));
- setDescription(object, offset - 2);
- // The `offset` points to the next of "<<". Set the rewind offset to point to the
- // beginning of "<<". This has been explicitly tested with whitespace surrounding
- // the dictionary start delimiter. getLastOffset points to the dictionary end token
- // and therefore can't be used here.
- set_offset = true;
- }
- stack.pop_back();
- if (state_stack.back() == st_top) {
- done = true;
- } else {
- stack.back().olist.push_back(object);
- }
}
}