aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2023-11-01 10:10:56 +0100
committerm-holger <m-holger@kubitscheck.org>2023-11-03 12:09:58 +0100
commit605b1429e8b58d7fada225acaf530cfe8e9954ac (patch)
treef0c935ce4f3fd8ae84f1d22f4d3b2a43252e6839
parent0328d8723793fa8c7f3cb4d243bfc7ed051e85bb (diff)
downloadqpdf-605b1429e8b58d7fada225acaf530cfe8e9954ac.tar.zst
In QPDFParser::parse create dictionaries on the fly
Also, don't search for /Contents name unless the result is used.
-rw-r--r--libqpdf/QPDFParser.cc128
-rw-r--r--libqpdf/qpdf/QPDFParser.hh9
-rw-r--r--qpdf/qtest/qpdf/bad36-recover.out2
-rw-r--r--qpdf/qtest/qpdf/bad36.out2
-rw-r--r--qpdf/qtest/qpdf/issue-335a.out4
5 files changed, 80 insertions, 65 deletions
diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc
index fd57c6f3..d2b2af6a 100644
--- a/libqpdf/QPDFParser.cc
+++ b/libqpdf/QPDFParser.cc
@@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
stack.clear();
stack.emplace_back(
input,
- (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary);
+ (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
frame = &stack.back();
return parseRemainder(content_stream);
@@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream)
continue;
case QPDFTokenizer::tt_dict_close:
- if (frame->state == st_dictionary) {
- // Convert list to map. Alternating elements are keys. Attempt to recover more or
- // less gracefully from invalid dictionaries.
- std::set<std::string> names;
- for (auto& obj: frame->olist) {
- if (obj) {
+ if (frame->state <= st_dictionary_value) {
+ // Attempt to recover more or less gracefully from invalid dictionaries.
+
+ auto& dict = frame->dict;
+ if (frame->state == st_dictionary_value) {
+ QTC::TC("qpdf", "QPDFParser no val for last key");
+ warn(
+ frame->offset,
+ "dictionary ended prematurely; using null as value for last key");
+ dict[frame->key] = QPDF_Null::create();
+ }
+
+ if (!frame->olist.empty()) {
+ std::set<std::string> names;
+ for (auto& obj: frame->olist) {
if (obj->getTypeCode() == ::ot_name) {
names.insert(obj->getStringValue());
}
}
- }
-
- std::map<std::string, QPDFObjectHandle> dict;
- int next_fake_key = 1;
- for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
- // Calculate key.
- std::string key;
- if (*iter && (*iter)->getTypeCode() == ::ot_name) {
- key = (*iter)->getStringValue();
- ++iter;
- } else {
- for (bool found_fake = false; !found_fake;) {
- key = "/QPDFFake" + std::to_string(next_fake_key++);
- found_fake = (names.count(key) == 0);
+ int next_fake_key = 1;
+ for (auto const& item: frame->olist) {
+ while (true) {
+ const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
+ const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0);
QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
+ if (found_fake) {
+ warn(
+ frame->offset,
+ "expected dictionary key but found non-name object; inserting "
+ "key " +
+ key);
+ dict[key] = item;
+ break;
+ }
}
- warn(
- frame->offset,
- "expected dictionary key but found non-name object; inserting key " +
- key);
- }
- if (dict.count(key) > 0) {
- QTC::TC("qpdf", "QPDFParser duplicate dict key");
- warn(
- frame->offset,
- "dictionary has duplicated key " + key +
- "; last occurrence overrides earlier ones");
- }
-
- // Calculate value.
- ObjectPtr val;
- if (iter != frame->olist.end()) {
- val = *iter;
- ++iter;
- } else {
- QTC::TC("qpdf", "QPDFParser no val for last key");
- warn(
- frame->offset,
- "dictionary ended prematurely; using null as value for last key");
- val = QPDF_Null::create();
}
-
- dict[std::move(key)] = val;
}
+
if (!frame->contents_string.empty() && dict.count("/Type") &&
dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
dict.count("/Contents") && dict["/Contents"].isString()) {
@@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream)
stack.emplace_back(
input,
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
- : st_dictionary);
+ : st_dictionary_key);
frame = &stack.back();
continue;
}
@@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream)
continue;
case QPDFTokenizer::tt_name:
- {
- auto const& name = tokenizer.getValue();
- addScalar<QPDF_Name>(name);
-
- if (name == "/Contents") {
- b_contents = true;
- } else {
- b_contents = false;
- }
+ if (frame->state == st_dictionary_key) {
+ frame->key = tokenizer.getValue();
+ frame->state = st_dictionary_value;
+ b_contents = decrypter && frame->key == "/Contents";
+ continue;
+ } else {
+ addScalar<QPDF_Name>(tokenizer.getValue());
}
continue;
@@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream)
addNull();
}
}
- return {}; // unreachable
}
void
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
{
- frame->olist.emplace_back(std::move(obj));
+ if (frame->state != st_dictionary_value) {
+ // If state is st_dictionary_key then there is a missing key. Push onto olist for
+ // processing once the tt_dict_close token has been found.
+ frame->olist.emplace_back(std::move(obj));
+ } else {
+ if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
+ warnDuplicateKey();
+ }
+ frame->state = st_dictionary_key;
+ }
}
void
@@ -429,7 +419,16 @@ QPDFParser::addNull()
{
const static ObjectPtr null_obj = QPDF_Null::create();
- frame->olist.emplace_back(null_obj);
+ if (frame->state != st_dictionary_value) {
+ // If state is st_dictionary_key then there is a missing key. Push onto olist for
+ // processing once the tt_dict_close token has been found.
+ frame->olist.emplace_back(null_obj);
+ } else {
+ if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
+ warnDuplicateKey();
+ }
+ frame->state = st_dictionary_key;
+ }
++frame->null_count;
}
@@ -496,6 +495,15 @@ QPDFParser::warn(QPDFExc const& e) const
}
void
+QPDFParser::warnDuplicateKey()
+{
+ QTC::TC("qpdf", "QPDFParser duplicate dict key");
+ warn(
+ frame->offset,
+ "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
+}
+
+void
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
{
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg));
diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh
index ef5be98e..3abe6c92 100644
--- a/libqpdf/qpdf/QPDFParser.hh
+++ b/libqpdf/qpdf/QPDFParser.hh
@@ -31,8 +31,9 @@ class QPDFParser
QPDFObjectHandle parse(bool& empty, bool content_stream);
private:
- struct StackFrame;
- enum parser_state_e { st_dictionary, st_array };
+ // Parser state. Note:
+ // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
+ enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
struct StackFrame
{
@@ -43,7 +44,9 @@ class QPDFParser
}
std::vector<std::shared_ptr<QPDFObject>> olist;
+ std::map<std::string, QPDFObjectHandle> dict;
parser_state_e state;
+ std::string key;
qpdf_offset_t offset;
std::string contents_string;
qpdf_offset_t contents_offset{-1};
@@ -57,6 +60,7 @@ class QPDFParser
template <typename T, typename... Args>
void addScalar(Args&&... args);
bool tooManyBadTokens();
+ void warnDuplicateKey();
void warn(qpdf_offset_t offset, std::string const& msg) const;
void warn(std::string const& msg) const;
void warn(QPDFExc const&) const;
@@ -83,7 +87,6 @@ class QPDFParser
int int_count = 0;
long long int_buffer[2]{0, 0};
qpdf_offset_t last_offset_buffer[2]{0, 0};
-
};
#endif // QPDFPARSER_HH
diff --git a/qpdf/qtest/qpdf/bad36-recover.out b/qpdf/qtest/qpdf/bad36-recover.out
index ac05acd9..9aacd729 100644
--- a/qpdf/qtest/qpdf/bad36-recover.out
+++ b/qpdf/qtest/qpdf/bad36-recover.out
@@ -1,6 +1,6 @@
WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
-WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
+WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
/QTest is implicit
/QTest is direct and has type null (2)
/QTest is null
diff --git a/qpdf/qtest/qpdf/bad36.out b/qpdf/qtest/qpdf/bad36.out
index cee3c286..e60d8685 100644
--- a/qpdf/qtest/qpdf/bad36.out
+++ b/qpdf/qtest/qpdf/bad36.out
@@ -1,6 +1,6 @@
WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
-WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
+WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
/QTest is implicit
/QTest is direct and has type null (2)
/QTest is null
diff --git a/qpdf/qtest/qpdf/issue-335a.out b/qpdf/qtest/qpdf/issue-335a.out
index 456bc475..c5b64465 100644
--- a/qpdf/qtest/qpdf/issue-335a.out
+++ b/qpdf/qtest/qpdf/issue-335a.out
@@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
+WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
+WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
+WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1328): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 1332): unexpected )
+WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 1333): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1344): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )