From 4a1cce0a470e6deed6dbeb6093e4e5c16f53439d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 16 Jan 2021 08:31:56 -0500 Subject: Reimplement name and number tree object helpers Create a computationally and memory efficient implementation of name and number trees that does binary searches as intended by the data structure rather than loading into a map, which can use a great deal of memory and can be very slow. --- include/qpdf/QPDFNameTreeObjectHelper.hh | 25 +++++++++++----------- include/qpdf/QPDFNumberTreeObjectHelper.hh | 34 +++++++++++------------------- 2 files changed, 24 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/qpdf/QPDFNameTreeObjectHelper.hh b/include/qpdf/QPDFNameTreeObjectHelper.hh index b5968e44..255ff24e 100644 --- a/include/qpdf/QPDFNameTreeObjectHelper.hh +++ b/include/qpdf/QPDFNameTreeObjectHelper.hh @@ -25,17 +25,16 @@ #include #include #include +#include #include // This is an object helper for name trees. See section 7.9.6 in the -// PDF spec (ISO 32000) for a description of name trees. This -// implementation disregards stated limits and sequencing and simply -// builds a map from string object. If the array of values does not -// contain a string where expected, this implementation silently skips -// forward until it finds a string. When looking up items in the name -// tree, use UTF-8 strings. All names are normalized for lookup -// purposes. +// PDF spec (ISO 32000) for a description of name trees. When looking +// up items in the name tree, use UTF-8 strings. All names are +// normalized for lookup purposes. + +class NNTreeImpl; class QPDFNameTreeObjectHelper: public QPDFObjectHelper { @@ -55,6 +54,9 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper QPDF_DLL bool findObject(std::string const& utf8, QPDFObjectHandle& oh); + // Return the contents of the name tree as a map. Note that name + // trees may be very large, so this may use a lot of RAM. It is + // more efficient to use QPDFNameTreeObjectHelper's iterator. QPDF_DLL std::map getAsMap() const; @@ -68,15 +70,12 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper ~Members(); private: - Members(); - Members(Members const&); + Members(QPDFObjectHandle& oh); + Members(Members const&) = delete; - std::map entries; - std::set seen; + std::shared_ptr impl; }; - void updateMap(QPDFObjectHandle oh); - PointerHolder m; }; diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh index 699fc687..7fb9195c 100644 --- a/include/qpdf/QPDFNumberTreeObjectHelper.hh +++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh @@ -24,17 +24,15 @@ #include #include -#include #include +#include #include // This is an object helper for number trees. See section 7.9.7 in the -// PDF spec (ISO 32000) for a description of number trees. This -// implementation disregards stated limits and sequencing and simply -// builds a map from numerical index to object. If the array of -// numbers does not contain a numerical value where expected, this -// implementation silently skips forward until it finds a number. +// PDF spec (ISO 32000) for a description of number trees. + +class NNTreeImpl; class QPDFNumberTreeObjectHelper: public QPDFObjectHelper { @@ -75,6 +73,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh, numtree_number& offset); + // Return the contents of the number tree as a map. Note that + // number trees may be very large, so this may use a lot of RAM. + // It is more efficient to use QPDFNumberTreeObjectHelper's + // iterator. typedef std::map idx_map; QPDF_DLL idx_map getAsMap() const; @@ -90,23 +92,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper ~Members(); private: - Members(); - Members(Members const&); - - // Use a reverse sorted map so we can use the lower_bound - // method for searching. lower_bound returns smallest entry - // not before the searched entry, meaning that the searched - // entry is the lower bound. There's also an upper_bound - // method, but it does not do what you'd think it should. - // lower_bound implements >=, and upper_bound implements >. - typedef std::map > idx_map; - idx_map entries; - std::set seen; - }; + Members(QPDFObjectHandle& oh); + Members(Members const&) = delete; - void updateMap(QPDFObjectHandle oh); + std::shared_ptr impl; + }; PointerHolder m; }; -- cgit v1.2.3-54-g00ecf