From 8fdc3f09648ad2c79455363255b9f8fdac9e65f3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 26 Mar 2023 20:02:49 +0100 Subject: Optimize QPDFParser for non-sparse QPDF_Arrays Stop using nullptr to represent null objects. Count null array elements and trigger creation of sparse arrays if null count is greater than 100. --- libqpdf/QPDFParser.cc | 26 +++++++++++++++----------- libqpdf/QPDF_Array.cc | 18 ++++++++++++------ libqpdf/qpdf/QPDF_Array.hh | 4 ++-- 3 files changed, 29 insertions(+), 19 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 09bf1601..4c43e487 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -27,16 +27,15 @@ namespace struct StackFrame { StackFrame(std::shared_ptr input) : - offset(input->tell()), - contents_string(""), - contents_offset(-1) + offset(input->tell()) { } std::vector> olist; qpdf_offset_t offset; - std::string contents_string; - qpdf_offset_t contents_offset; + std::string contents_string{""}; + qpdf_offset_t contents_offset{-1}; + int null_count{0}; }; } // namespace @@ -50,6 +49,7 @@ QPDFParser::parse(bool& empty, bool content_stream) // this, it will cause a logic error to be thrown from // QPDF::inParse(). + const static std::shared_ptr null_oh = QPDF_Null::create(); QPDF::ParseGuard pg(context); empty = false; @@ -67,7 +67,6 @@ QPDFParser::parse(bool& empty, bool content_stream) int good_count = 0; bool b_contents = false; bool is_null = false; - auto null_oh = QPDF_Null::create(); while (!done) { bool bad = false; @@ -156,6 +155,8 @@ QPDFParser::parse(bool& empty, bool content_stream) case QPDFTokenizer::tt_null: is_null = true; + ++frame.null_count; + break; case QPDFTokenizer::tt_integer: @@ -301,9 +302,11 @@ QPDFParser::parse(bool& empty, bool content_stream) case st_dictionary: case st_array: - if (!indirect_ref && !is_null) { - // No need to set description for direct nulls - they will - // become implicit. + if (is_null) { + object = null_oh; + // No need to set description for direct nulls - they probably + // will become implicit. + } else if (!indirect_ref) { setDescription(object, input->getLastOffset()); } set_offset = true; @@ -326,7 +329,8 @@ QPDFParser::parse(bool& empty, bool content_stream) parser_state_e old_state = state_stack.back(); state_stack.pop_back(); if (old_state == st_array) { - object = QPDF_Array::create(std::move(olist)); + object = QPDF_Array::create( + std::move(olist), frame.null_count > 100); setDescription(object, offset - 1); // The `offset` points to the next of "[". Set the rewind // offset to point to the beginning of "[". This has been @@ -381,7 +385,7 @@ QPDFParser::parse(bool& empty, bool content_stream) // Calculate value. std::shared_ptr val; if (iter != olist.end()) { - val = *iter ? *iter : QPDF_Null::create(); + val = *iter; ++iter; } else { QTC::TC("qpdf", "QPDFParser no val for last key"); diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc index 7a3a29ca..31abd8eb 100644 --- a/libqpdf/QPDF_Array.cc +++ b/libqpdf/QPDF_Array.cc @@ -11,15 +11,19 @@ QPDF_Array::QPDF_Array(std::vector const& v) : setFromVector(v); } -QPDF_Array::QPDF_Array(std::vector>&& v) : - QPDFValue(::ot_array, "array") +QPDF_Array::QPDF_Array( + std::vector>&& v, bool sparse) : + QPDFValue(::ot_array, "array"), + sparse(sparse) { setFromVector(std::move(v)); } QPDF_Array::QPDF_Array(SparseOHArray const& items) : QPDFValue(::ot_array, "array"), + sparse(true), sp_elements(items) + { } @@ -37,9 +41,10 @@ QPDF_Array::create(std::vector const& items) } std::shared_ptr -QPDF_Array::create(std::vector>&& items) +QPDF_Array::create( + std::vector>&& items, bool sparse) { - return do_create(new QPDF_Array(std::move(items))); + return do_create(new QPDF_Array(std::move(items), sparse)); } std::shared_ptr @@ -196,8 +201,9 @@ QPDF_Array::setFromVector(std::vector>&& v) if (sparse) { sp_elements = SparseOHArray(); for (auto&& item: v) { - if (item) { - sp_elements.append(item); + if (item->getTypeCode() != ::ot_null || + item->getObjGen().isIndirect()) { + sp_elements.append(std::move(item)); } else { ++sp_elements.n_elements; } diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh index 1c4227ba..00c7f59d 100644 --- a/libqpdf/qpdf/QPDF_Array.hh +++ b/libqpdf/qpdf/QPDF_Array.hh @@ -15,7 +15,7 @@ class QPDF_Array: public QPDFValue static std::shared_ptr create(std::vector const& items); static std::shared_ptr - create(std::vector>&& items); + create(std::vector>&& items, bool sparse); static std::shared_ptr create(SparseOHArray const& items); static std::shared_ptr create(OHArray const& items); virtual std::shared_ptr copy(bool shallow = false); @@ -36,7 +36,7 @@ class QPDF_Array: public QPDFValue private: QPDF_Array(std::vector const& items); - QPDF_Array(std::vector>&& items); + QPDF_Array(std::vector>&& items, bool sparse); QPDF_Array(SparseOHArray const& items); QPDF_Array(OHArray const& items); bool sparse{false}; -- cgit v1.2.3-54-g00ecf