diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/QPDFParser.cc | 674 | ||||
-rw-r--r-- | libqpdf/qpdf/QPDFParser.hh | 47 |
2 files changed, 423 insertions, 298 deletions
diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 6dcbddb5..32c4f8e9 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -21,22 +21,7 @@ #include <memory> -namespace -{ - struct StackFrame - { - StackFrame(std::shared_ptr<InputSource> input) : - offset(input->tell()) - { - } - - std::vector<std::shared_ptr<QPDFObject>> olist; - qpdf_offset_t offset; - std::string contents_string{""}; - qpdf_offset_t contents_offset{-1}; - int null_count{0}; - }; -} // namespace +using ObjectPtr = std::shared_ptr<QPDFObject>; QPDFObjectHandle QPDFParser::parse(bool& empty, bool content_stream) @@ -46,371 +31,457 @@ QPDFParser::parse(bool& empty, bool content_stream) // effect of reading the object and changing the file pointer. If you do this, it will cause a // logic error to be thrown from QPDF::inParse(). - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); QPDF::ParseGuard pg(context); - empty = false; + start = input->tell(); - std::shared_ptr<QPDFObject> object; - bool set_offset = false; - - std::vector<StackFrame> stack; - stack.emplace_back(input); - std::vector<parser_state_e> state_stack; - state_stack.push_back(st_top); - qpdf_offset_t offset; - bool done = false; - int bad_count = 0; - int good_count = 0; - bool b_contents = false; - bool is_null = false; + if (!tokenizer.nextToken(*input, object_description)) { + warn(tokenizer.getErrorMessage()); + } + + switch (tokenizer.getType()) { + case QPDFTokenizer::tt_eof: + if (content_stream) { + // In content stream mode, leave object uninitialized to indicate EOF + return {}; + } + QTC::TC("qpdf", "QPDFParser eof in parse"); + warn("unexpected EOF"); + return {QPDF_Null::create()}; + + case QPDFTokenizer::tt_bad: + QTC::TC("qpdf", "QPDFParser bad token in parse"); + return {QPDF_Null::create()}; + + case QPDFTokenizer::tt_brace_open: + case QPDFTokenizer::tt_brace_close: + QTC::TC("qpdf", "QPDFParser bad brace"); + warn("treating unexpected brace token as null"); + return {QPDF_Null::create()}; + + case QPDFTokenizer::tt_array_close: + QTC::TC("qpdf", "QPDFParser bad array close"); + warn("treating unexpected array close token as null"); + return {QPDF_Null::create()}; + + case QPDFTokenizer::tt_dict_close: + QTC::TC("qpdf", "QPDFParser bad dictionary close"); + warn("unexpected dictionary close token"); + return {QPDF_Null::create()}; + + case QPDFTokenizer::tt_array_open: + case QPDFTokenizer::tt_dict_open: + stack.clear(); + stack.emplace_back( + input, + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); + frame = &stack.back(); + return parseRemainder(content_stream); + + case QPDFTokenizer::tt_bool: + return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); + + case QPDFTokenizer::tt_null: + return {QPDF_Null::create()}; + + case QPDFTokenizer::tt_integer: + return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); + + case QPDFTokenizer::tt_real: + return withDescription<QPDF_Real>(tokenizer.getValue()); + + case QPDFTokenizer::tt_name: + return withDescription<QPDF_Name>(tokenizer.getValue()); + + case QPDFTokenizer::tt_word: + { + auto const& value = tokenizer.getValue(); + if (content_stream) { + return withDescription<QPDF_Operator>(value); + } else if (value == "endobj") { + // We just saw endobj without having read anything. Treat this as a null and do + // not move the input source's offset. + input->seek(input->getLastOffset(), SEEK_SET); + empty = true; + return {QPDF_Null::create()}; + } else { + QTC::TC("qpdf", "QPDFParser treat word as string"); + warn("unknown token while reading object; treating as string"); + return withDescription<QPDF_String>(value); + } + } + + case QPDFTokenizer::tt_string: + if (decrypter) { + std::string s{tokenizer.getValue()}; + decrypter->decryptString(s); + return withDescription<QPDF_String>(s); + } else { + return withDescription<QPDF_String>(tokenizer.getValue()); + } + + default: + warn("treating unknown token type as null while reading object"); + return {QPDF_Null::create()}; + } +} - while (!done) { - bool bad = false; - bool indirect_ref = false; - is_null = false; - auto& frame = stack.back(); - auto& olist = frame.olist; - parser_state_e state = state_stack.back(); - offset = frame.offset; +QPDFObjectHandle +QPDFParser::parseRemainder(bool content_stream) +{ + // This method must take care not to resolve any objects. Don't check the type of any object + // without first ensuring that it is a direct object. Otherwise, doing so may have the side + // effect of reading the object and changing the file pointer. If you do this, it will cause a + // logic error to be thrown from QPDF::inParse(). - object = nullptr; - set_offset = false; + bad_count = 0; + bool b_contents = false; + while (true) { if (!tokenizer.nextToken(*input, object_description)) { warn(tokenizer.getErrorMessage()); } + ++good_count; // optimistically + + if (int_count != 0) { + // Special handling of indirect references. Treat integer tokens as part of an indirect + // reference until proven otherwise. + if (tokenizer.getType() == QPDFTokenizer::tt_integer) { + if (++int_count > 2) { + // Process the oldest buffered integer. + addInt(int_count); + } + last_offset_buffer[int_count % 2] = input->getLastOffset(); + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); + continue; + + } else if ( + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && + tokenizer.getValue() == "R") { + if (context == nullptr) { + QTC::TC("qpdf", "QPDFParser indirect without context"); + throw std::logic_error("QPDFParser::parse called without context on an object " + "with indirect references"); + } + auto ref_og = QPDFObjGen( + QIntC::to_int(int_buffer[(int_count - 1) % 2]), + QIntC::to_int(int_buffer[(int_count) % 2])); + if (ref_og.isIndirect()) { + // This action has the desirable side effect of causing dangling references + // (references to indirect objects that don't appear in the PDF) in any parsed + // object to appear in the object cache. + add(std::move(context->getObject(ref_og).obj)); + } else { + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); + addNull(); + } + int_count = 0; + continue; + + } else if (int_count > 0) { + // Process the buffered integers before processing the current token. + if (int_count > 1) { + addInt(int_count - 1); + } + addInt(int_count); + int_count = 0; + } + } switch (tokenizer.getType()) { case QPDFTokenizer::tt_eof: - if (!content_stream) { - QTC::TC("qpdf", "QPDFParser eof in parse"); - warn("unexpected EOF"); + warn("parse error while reading object"); + if (content_stream) { + // In content stream mode, leave object uninitialized to indicate EOF + return {}; } - bad = true; - state = st_eof; - break; + QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); + warn("unexpected EOF"); + return {QPDF_Null::create()}; case QPDFTokenizer::tt_bad: - QTC::TC("qpdf", "QPDFParser bad token in parse"); - bad = true; - is_null = true; - break; + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); + if (tooManyBadTokens()) { + return {QPDF_Null::create()}; + } + addNull(); + continue; case QPDFTokenizer::tt_brace_open: case QPDFTokenizer::tt_brace_close: - QTC::TC("qpdf", "QPDFParser bad brace"); + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); warn("treating unexpected brace token as null"); - bad = true; - is_null = true; - break; + if (tooManyBadTokens()) { + return {QPDF_Null::create()}; + } + addNull(); + continue; case QPDFTokenizer::tt_array_close: - if (state == st_array) { - state = st_stop; + if (frame->state == st_array) { + auto object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); + setDescription(object, frame->offset - 1); + // The `offset` points to the next of "[". Set the rewind offset to point to the + // beginning of "[". This has been explicitly tested with whitespace surrounding the + // array start delimiter. getLastOffset points to the array end token and therefore + // can't be used here. + if (stack.size() <= 1) { + return object; + } + stack.pop_back(); + frame = &stack.back(); + add(std::move(object)); } else { - QTC::TC("qpdf", "QPDFParser bad array close"); + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); warn("treating unexpected array close token as null"); - bad = true; - is_null = true; + if (tooManyBadTokens()) { + return {QPDF_Null::create()}; + } + addNull(); } - break; + continue; case QPDFTokenizer::tt_dict_close: - if (state == st_dictionary) { - state = st_stop; + if (frame->state <= st_dictionary_value) { + // Attempt to recover more or less gracefully from invalid dictionaries. + auto& dict = frame->dict; + + if (frame->state == st_dictionary_value) { + QTC::TC("qpdf", "QPDFParser no val for last key"); + warn( + frame->offset, + "dictionary ended prematurely; using null as value for last key"); + dict[frame->key] = QPDF_Null::create(); + } + + if (!frame->olist.empty()) + fixMissingKeys(); + + if (!frame->contents_string.empty() && dict.count("/Type") && + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && + dict.count("/Contents") && dict["/Contents"].isString()) { + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); + dict["/Contents"].setParsedOffset(frame->contents_offset); + } + auto object = QPDF_Dictionary::create(std::move(dict)); + setDescription(object, frame->offset - 2); + // The `offset` points to the next of "<<". Set the rewind offset to point to the + // beginning of "<<". This has been explicitly tested with whitespace surrounding + // the dictionary start delimiter. getLastOffset points to the dictionary end token + // and therefore can't be used here. + if (stack.size() <= 1) { + return object; + } + stack.pop_back(); + frame = &stack.back(); + add(std::move(object)); } else { - QTC::TC("qpdf", "QPDFParser bad dictionary close"); + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); warn("unexpected dictionary close token"); - bad = true; - is_null = true; + if (tooManyBadTokens()) { + return {QPDF_Null::create()}; + } + addNull(); } - break; + continue; case QPDFTokenizer::tt_array_open: case QPDFTokenizer::tt_dict_open: - if (stack.size() > 500) { + if (stack.size() > 499) { QTC::TC("qpdf", "QPDFParser too deep"); warn("ignoring excessively deeply nested data structure"); - bad = true; - is_null = true; - state = st_top; + return {QPDF_Null::create()}; } else { - state = st_start; - state_stack.push_back( - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array - : st_dictionary); b_contents = false; - stack.emplace_back(input); + stack.emplace_back( + input, + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array + : st_dictionary_key); + frame = &stack.back(); + continue; } - break; case QPDFTokenizer::tt_bool: - object = QPDF_Bool::create((tokenizer.getValue() == "true")); - break; + addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); + continue; case QPDFTokenizer::tt_null: - is_null = true; - ++frame.null_count; - - break; + addNull(); + continue; case QPDFTokenizer::tt_integer: - object = QPDF_Integer::create( - QUtil::string_to_ll(std::string(tokenizer.getValue()).c_str())); - break; + if (!content_stream) { + // Buffer token in case it is part of an indirect reference. + last_offset_buffer[1] = input->getLastOffset(); + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); + int_count = 1; + } else { + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); + } + continue; case QPDFTokenizer::tt_real: - object = QPDF_Real::create(tokenizer.getValue()); - break; + addScalar<QPDF_Real>(tokenizer.getValue()); + continue; case QPDFTokenizer::tt_name: - { - auto name = tokenizer.getValue(); - object = QPDF_Name::create(name); - - if (name == "/Contents") { - b_contents = true; - } else { - b_contents = false; - } + if (frame->state == st_dictionary_key) { + frame->key = tokenizer.getValue(); + frame->state = st_dictionary_value; + b_contents = decrypter && frame->key == "/Contents"; + continue; + } else { + addScalar<QPDF_Name>(tokenizer.getValue()); } - break; + continue; case QPDFTokenizer::tt_word: - { - auto value = tokenizer.getValue(); - auto size = olist.size(); - if (content_stream) { - object = QPDF_Operator::create(value); - } else if ( - value == "R" && state != st_top && size >= 2 && olist.back() && - olist.back()->getTypeCode() == ::ot_integer && - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) && - olist.at(size - 2)->getTypeCode() == ::ot_integer && - !olist.at(size - 2)->getObjGen().isIndirect()) { - if (context == nullptr) { - QTC::TC("qpdf", "QPDFParser indirect without context"); - throw std::logic_error("QPDFObjectHandle::parse called without context on " - "an object with indirect references"); - } - auto ref_og = QPDFObjGen( - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), - QPDFObjectHandle(olist.back()).getIntValueAsInt()); - if (ref_og.isIndirect()) { - // This action has the desirable side effect of causing dangling references - // (references to indirect objects that don't appear in the PDF) in any - // parsed object to appear in the object cache. - object = context->getObject(ref_og).obj; - indirect_ref = true; - } else { - QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); - is_null = true; - } - olist.pop_back(); - olist.pop_back(); - } else if ((value == "endobj") && (state == st_top)) { - // We just saw endobj without having read anything. Treat this as a null and do - // not move the input source's offset. - is_null = true; - input->seek(input->getLastOffset(), SEEK_SET); - empty = true; - } else { - QTC::TC("qpdf", "QPDFParser treat word as string"); - warn("unknown token while reading object; treating as string"); - bad = true; - object = QPDF_String::create(value); + if (content_stream) { + addScalar<QPDF_Operator>(tokenizer.getValue()); + } else { + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); + warn("unknown token while reading object; treating as string"); + if (tooManyBadTokens()) { + return {QPDF_Null::create()}; } + addScalar<QPDF_String>(tokenizer.getValue()); } - break; + continue; case QPDFTokenizer::tt_string: { - auto val = tokenizer.getValue(); + auto const& val = tokenizer.getValue(); if (decrypter) { if (b_contents) { - frame.contents_string = val; - frame.contents_offset = input->getLastOffset(); + frame->contents_string = val; + frame->contents_offset = input->getLastOffset(); b_contents = false; } std::string s{val}; decrypter->decryptString(s); - object = QPDF_String::create(s); + addScalar<QPDF_String>(s); } else { - object = QPDF_String::create(val); + addScalar<QPDF_String>(val); } } - - break; + continue; default: warn("treating unknown token type as null while reading object"); - bad = true; - is_null = true; - break; - } - - if (object == nullptr && !is_null && - (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); - is_null = true; - } - - if (bad) { - ++bad_count; - good_count = 0; - } else { - ++good_count; - if (good_count > 3) { - bad_count = 0; + if (tooManyBadTokens()) { + return {QPDF_Null::create()}; } + addNull(); } - if (bad_count > 5) { - // We had too many consecutive errors without enough intervening successful objects. - // Give up. - warn("too many errors; giving up on reading object"); - state = st_top; - is_null = true; - } + } +} - switch (state) { - case st_eof: - if (state_stack.size() > 1) { - warn("parse error while reading object"); - } - done = true; - // In content stream mode, leave object uninitialized to indicate EOF - if (!content_stream) { - is_null = true; - } - break; - - case st_dictionary: - case st_array: - if (is_null) { - object = null_oh; - // No need to set description for direct nulls - they probably will become implicit. - } else if (!indirect_ref) { - setDescription(object, input->getLastOffset()); - } - set_offset = true; - olist.push_back(object); - break; +void +QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) +{ + if (frame->state != st_dictionary_value) { + // If state is st_dictionary_key then there is a missing key. Push onto olist for + // processing once the tt_dict_close token has been found. + frame->olist.emplace_back(std::move(obj)); + } else { + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { + warnDuplicateKey(); + } + frame->state = st_dictionary_key; + } +} - case st_top: - done = true; - break; +void +QPDFParser::addNull() +{ + const static ObjectPtr null_obj = QPDF_Null::create(); - case st_start: - break; + if (frame->state != st_dictionary_value) { + // If state is st_dictionary_key then there is a missing key. Push onto olist for + // processing once the tt_dict_close token has been found. + frame->olist.emplace_back(null_obj); + } else { + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { + warnDuplicateKey(); + } + frame->state = st_dictionary_key; + } + ++frame->null_count; +} - case st_stop: - if ((state_stack.size() < 2) || (stack.size() < 2)) { - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " - "insufficient elements in stack"); - } - parser_state_e old_state = state_stack.back(); - state_stack.pop_back(); - if (old_state == st_array) { - object = QPDF_Array::create(std::move(olist), frame.null_count > 100); - setDescription(object, offset - 1); - // The `offset` points to the next of "[". Set the rewind offset to point to the - // beginning of "[". This has been explicitly tested with whitespace surrounding the - // array start delimiter. getLastOffset points to the array end token and therefore - // can't be used here. - set_offset = true; - } else if (old_state == st_dictionary) { - // Convert list to map. Alternating elements are keys. Attempt to recover more or - // less gracefully from invalid dictionaries. - std::set<std::string> names; - for (auto& obj: olist) { - if (obj) { - if (obj->getTypeCode() == ::ot_name) { - names.insert(obj->getStringValue()); - } - } - } +void +QPDFParser::addInt(int count) +{ + auto obj = QPDF_Integer::create(int_buffer[count % 2]); + obj->setDescription(context, description, last_offset_buffer[count % 2]); + add(std::move(obj)); +} - std::map<std::string, QPDFObjectHandle> dict; - int next_fake_key = 1; - for (auto iter = olist.begin(); iter != olist.end();) { - // Calculate key. - std::string key; - if (*iter && (*iter)->getTypeCode() == ::ot_name) { - key = (*iter)->getStringValue(); - ++iter; - } else { - for (bool found_fake = false; !found_fake;) { - key = "/QPDFFake" + std::to_string(next_fake_key++); - found_fake = (names.count(key) == 0); - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); - } - warn( - offset, - "expected dictionary key but found non-name object; inserting key " + - key); - } - if (dict.count(key) > 0) { - QTC::TC("qpdf", "QPDFParser duplicate dict key"); - warn( - offset, - "dictionary has duplicated key " + key + - "; last occurrence overrides earlier ones"); - } +template <typename T, typename... Args> +void +QPDFParser::addScalar(Args&&... args) +{ + auto obj = T::create(args...); + obj->setDescription(context, description, input->getLastOffset()); + add(std::move(obj)); +} - // Calculate value. - std::shared_ptr<QPDFObject> val; - if (iter != olist.end()) { - val = *iter; - ++iter; - } else { - QTC::TC("qpdf", "QPDFParser no val for last key"); - warn( - offset, - "dictionary ended prematurely; using null as value for last key"); - val = QPDF_Null::create(); - } +template <typename T, typename... Args> +QPDFObjectHandle +QPDFParser::withDescription(Args&&... args) +{ + auto obj = T::create(args...); + obj->setDescription(context, description, start); + return {obj}; +} - dict[std::move(key)] = std::move(val); - } - if (!frame.contents_string.empty() && dict.count("/Type") && - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && - dict.count("/Contents") && dict["/Contents"].isString()) { - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); - dict["/Contents"].setParsedOffset(frame.contents_offset); - } - object = QPDF_Dictionary::create(std::move(dict)); - setDescription(object, offset - 2); - // The `offset` points to the next of "<<". Set the rewind offset to point to the - // beginning of "<<". This has been explicitly tested with whitespace surrounding - // the dictionary start delimiter. getLastOffset points to the dictionary end token - // and therefore can't be used here. - set_offset = true; - } - stack.pop_back(); - if (state_stack.back() == st_top) { - done = true; - } else { - stack.back().olist.push_back(object); - } - } +void +QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) +{ + if (obj) { + obj->setDescription(context, description, parsed_offset); } +} - if (is_null) { - object = QPDF_Null::create(); +void +QPDFParser::fixMissingKeys() +{ + std::set<std::string> names; + for (auto& obj: frame->olist) { + if (obj->getTypeCode() == ::ot_name) { + names.insert(obj->getStringValue()); + } } - if (!set_offset) { - setDescription(object, offset); + int next_fake_key = 1; + for (auto const& item: frame->olist) { + while (true) { + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); + const bool found_fake = frame->dict.count(key) == 0 && names.count(key) == 0; + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); + if (found_fake) { + warn( + frame->offset, + "expected dictionary key but found non-name object; inserting key " + key); + frame->dict[key] = item; + break; + } + } } - return object; } -void -QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset) +bool +QPDFParser::tooManyBadTokens() { - if (obj) { - obj->setDescription(context, description, parsed_offset); + if (good_count <= 4) { + if (++bad_count > 5) { + warn("too many errors; giving up on reading object"); + return true; + } + } else { + bad_count = 1; } + good_count = 0; + return false; } void @@ -427,6 +498,15 @@ QPDFParser::warn(QPDFExc const& e) const } void +QPDFParser::warnDuplicateKey() +{ + QTC::TC("qpdf", "QPDFParser duplicate dict key"); + warn( + frame->offset, + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); +} + +void QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const { warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index 35f9f603..7f5f7804 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -31,11 +31,44 @@ class QPDFParser QPDFObjectHandle parse(bool& empty, bool content_stream); private: - enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; + // Parser state. Note: + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; + struct StackFrame + { + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) : + state(state), + offset(input->tell()) + { + } + + std::vector<std::shared_ptr<QPDFObject>> olist; + std::map<std::string, QPDFObjectHandle> dict; + parser_state_e state; + std::string key; + qpdf_offset_t offset; + std::string contents_string; + qpdf_offset_t contents_offset{-1}; + int null_count{0}; + }; + + QPDFObjectHandle parseRemainder(bool content_stream); + void add(std::shared_ptr<QPDFObject>&& obj); + void addNull(); + void addInt(int count); + template <typename T, typename... Args> + void addScalar(Args&&... args); + bool tooManyBadTokens(); + void warnDuplicateKey(); + void fixMissingKeys(); void warn(qpdf_offset_t offset, std::string const& msg) const; void warn(std::string const& msg) const; void warn(QPDFExc const&) const; + template <typename T, typename... Args> + // Create a new scalar object complete with parsed offset and description. + // NB the offset includes any leading whitespace. + QPDFObjectHandle withDescription(Args&&... args); void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); std::shared_ptr<InputSource> input; std::string const& object_description; @@ -43,6 +76,18 @@ class QPDFParser QPDFObjectHandle::StringDecrypter* decrypter; QPDF* context; std::shared_ptr<QPDFValue::Description> description; + std::vector<StackFrame> stack; + StackFrame* frame; + // Number of recent bad tokens. + int bad_count = 0; + // Number of good tokens since last bad token. Irrelevant if bad_count == 0. + int good_count = 0; + // Start offset including any leading whitespace. + qpdf_offset_t start; + // Number of successive integer tokens. + int int_count = 0; + long long int_buffer[2]{0, 0}; + qpdf_offset_t last_offset_buffer[2]{0, 0}; }; #endif // QPDFPARSER_HH |