diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/NNTree.cc | 414 | ||||
-rw-r--r-- | libqpdf/QPDFNameTreeObjectHelper.cc | 88 | ||||
-rw-r--r-- | libqpdf/QPDFNumberTreeObjectHelper.cc | 104 | ||||
-rw-r--r-- | libqpdf/build.mk | 1 | ||||
-rw-r--r-- | libqpdf/qpdf/NNTree.hh | 105 |
5 files changed, 613 insertions, 99 deletions
diff --git a/libqpdf/NNTree.cc b/libqpdf/NNTree.cc new file mode 100644 index 00000000..abc0043b --- /dev/null +++ b/libqpdf/NNTree.cc @@ -0,0 +1,414 @@ +#include <qpdf/NNTree.hh> +#include <qpdf/QUtil.hh> + +#include <exception> + +NNTreeIterator::PathElement::PathElement( + QPDFObjectHandle const& node, int kid_number) : + node(node), + kid_number(kid_number) +{ +} + +QPDFObjectHandle +NNTreeIterator::PathElement::getNextKid(bool backward) +{ + kid_number += backward ? -1 : 1; + auto kids = node.getKey("/Kids"); + QPDFObjectHandle result; + if ((kid_number >= 0) && (kid_number < kids.getArrayNItems())) + { + result = kids.getArrayItem(kid_number); + } + else + { + result = QPDFObjectHandle::newNull(); + } + return result; +} + +bool +NNTreeIterator::valid() const +{ + return this->item_number >= 0; +} + +void +NNTreeIterator::increment(bool backward) +{ + if (this->item_number < 0) + { + throw std::logic_error( + "attempt made to increment or decrement an invalid" + " name/number tree iterator"); + } + this->item_number += backward ? -2 : 2; + auto items = this->node.getKey(details.itemsKey()); + if ((this->item_number < 0) || + (this->item_number >= items.getArrayNItems())) + { + bool found = false; + setItemNumber(QPDFObjectHandle(), -1); + while (! (found || this->path.empty())) + { + auto& element = this->path.back(); + auto node = element.getNextKid(backward); + if (node.isNull()) + { + this->path.pop_back(); + } + else + { + deepen(node, ! backward); + found = true; + } + } + } +} + +NNTreeIterator& +NNTreeIterator::operator++() +{ + increment(false); + return *this; +} + +NNTreeIterator& +NNTreeIterator::operator--() +{ + increment(true); + return *this; +} + +NNTreeIterator::reference +NNTreeIterator::operator*() +{ + if (this->item_number < 0) + { + throw std::logic_error( + "attempt made to dereference an invalid" + " name/number tree iterator"); + } + auto items = this->node.getKey(details.itemsKey()); + return std::make_pair(items.getArrayItem(this->item_number), + items.getArrayItem(1+this->item_number)); +} + +bool +NNTreeIterator::operator==(NNTreeIterator const& other) const +{ + if ((this->item_number == -1) && (other.item_number == -1)) + { + return true; + } + if (this->path.size() != other.path.size()) + { + return false; + } + auto tpi = this->path.begin(); + auto opi = other.path.begin(); + while (tpi != this->path.end()) + { + if ((*tpi).kid_number != (*opi).kid_number) + { + return false; + } + ++tpi; + ++opi; + } + if (this->item_number != other.item_number) + { + return false; + } + return true; +} + +void +NNTreeIterator::setItemNumber(QPDFObjectHandle const& node, int n) +{ + this->node = node; + this->item_number = n; +} + +void +NNTreeIterator::addPathElement(QPDFObjectHandle const& node, + int kid_number) +{ + this->path.push_back(PathElement(node, kid_number)); +} + +void +NNTreeIterator::deepen(QPDFObjectHandle node, bool first) +{ + std::set<QPDFObjGen> seen; + while (true) + { + if (node.isIndirect()) + { + auto og = node.getObjGen(); + if (seen.count(og)) + { + throw std::runtime_error("loop detected"); + } + seen.insert(og); + } + auto kids = node.getKey("/Kids"); + int nkids = kids.isArray() ? kids.getArrayNItems() : 0; + auto items = node.getKey(details.itemsKey()); + int nitems = items.isArray() ? items.getArrayNItems() : 0; + if (nitems > 0) + { + setItemNumber(node, first ? 0 : nitems - 2); + break; + } + else if (nkids > 0) + { + int kid_number = first ? 0 : nkids - 1; + addPathElement(node, kid_number); + node = kids.getArrayItem(kid_number); + } + else + { + throw std::runtime_error("node has neither /Kids nor /Names"); + } + } +} + +NNTreeImpl::NNTreeImpl(NNTreeDetails const& details, + QPDF* qpdf, + QPDFObjectHandle& oh, + bool auto_repair) : + details(details), + oh(oh) +{ +} + +NNTreeImpl::iterator +NNTreeImpl::begin() +{ + iterator result(details); + result.deepen(this->oh, true); + return result; +} + +NNTreeImpl::iterator +NNTreeImpl::end() +{ + return iterator(details); +} + +NNTreeImpl::iterator +NNTreeImpl::last() +{ + iterator result(details); + result.deepen(this->oh, false); + return result; +} + +int +NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node) +{ + int result = 0; + auto limits = node.getKey("/Limits"); + if (limits.isArray() && (limits.getArrayNItems() >= 2) && + details.keyValid(limits.getArrayItem(0)) && + details.keyValid(limits.getArrayItem(1))) + { + if (details.compareKeys(key, limits.getArrayItem(0)) < 0) + { + result = -1; + } + else if (details.compareKeys(key, limits.getArrayItem(1)) > 0) + { + result = 1; + } + } + else + { + // The root node has no limits, so consider the item to be in + // here if there are no limits. This will cause checking lower + // items. + } + return result; +} + +int +NNTreeImpl::binarySearch( + QPDFObjectHandle key, QPDFObjectHandle items, + int num_items, bool return_prev_if_not_found, + int (NNTreeImpl::*compare)(QPDFObjectHandle& key, + QPDFObjectHandle& node, + int item)) +{ + int max_idx = 1; + while (max_idx < num_items) + { + max_idx <<= 1; + } + + int step = max_idx / 2; + int checks = max_idx; + int idx = step; + int found_idx = -1; + bool found = false; + bool found_leq = false; + int status = 0; + + while ((! found) && (checks > 0)) + { + if (idx < num_items) + { + status = (this->*compare)(key, items, idx); + if (status >= 0) + { + found_leq = true; + found_idx = idx; + } + } + else + { + // consider item to be below anything after the top + status = -1; + } + + if (status == 0) + { + found = true; + } + else + { + checks >>= 1; + if (checks > 0) + { + step >>= 1; + if (step == 0) + { + step = 1; + } + + if (status < 0) + { + idx -= step; + } + else + { + idx += step; + } + } + } + } + + if (found || (found_leq && return_prev_if_not_found)) + { + return found_idx; + } + else + { + return -1; + } +} + +int +NNTreeImpl::compareKeyItem( + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx) +{ + if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) && + details.keyValid(items.getArrayItem(2 * idx))))) + { + throw std::runtime_error("item at index " + + QUtil::int_to_string(2 * idx) + + " is not the right type"); + } + return details.compareKeys(key, items.getArrayItem(2 * idx)); +} + +int +NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) +{ + if (! (kids.isArray() && (idx < kids.getArrayNItems()) && + kids.getArrayItem(idx).isDictionary())) + { + throw std::runtime_error("invalid kid at index " + + QUtil::int_to_string(idx)); + } + return withinLimits(key, kids.getArrayItem(idx)); +} + + +NNTreeImpl::iterator +NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) +{ + auto first_item = begin(); + auto last_item = end(); + if (first_item.valid() && + details.keyValid((*first_item).first) && + details.compareKeys(key, (*first_item).first) < 0) + { + // Before the first key + return end(); + } + else if (last_item.valid() && + details.keyValid((*last_item).first) && + details.compareKeys(key, (*last_item).first) > 0) + { + // After the last key + if (return_prev_if_not_found) + { + return last_item; + } + else + { + return end(); + } + } + + std::set<QPDFObjGen> seen; + auto node = this->oh; + iterator result(details); + + while (true) + { + auto og = node.getObjGen(); + if (seen.count(og)) + { + throw std::runtime_error("loop detected in find"); + } + seen.insert(og); + + auto kids = node.getKey("/Kids"); + int nkids = kids.isArray() ? kids.getArrayNItems() : 0; + auto items = node.getKey(details.itemsKey()); + int nitems = items.isArray() ? items.getArrayNItems() : 0; + if (nitems > 0) + { + int idx = binarySearch( + key, items, nitems / 2, return_prev_if_not_found, + &NNTreeImpl::compareKeyItem); + if (idx >= 0) + { + result.setItemNumber(node, 2 * idx); + } + break; + } + else if (nkids > 0) + { + int idx = binarySearch( + key, kids, nkids, true, + &NNTreeImpl::compareKeyKid); + if (idx == -1) + { + throw std::runtime_error( + "unexpected -1 from binary search of kids;" + " tree may not be sorted"); + } + result.addPathElement(node, idx); + node = kids.getArrayItem(idx); + } + else + { + throw std::runtime_error("bad node during find"); + } + } + + return result; +} diff --git a/libqpdf/QPDFNameTreeObjectHelper.cc b/libqpdf/QPDFNameTreeObjectHelper.cc index a7f0a00d..07a8ad02 100644 --- a/libqpdf/QPDFNameTreeObjectHelper.cc +++ b/libqpdf/QPDFNameTreeObjectHelper.cc @@ -1,73 +1,66 @@ #include <qpdf/QPDFNameTreeObjectHelper.hh> +#include <qpdf/NNTree.hh> + +class NameTreeDetails: public NNTreeDetails +{ + public: + virtual std::string const& itemsKey() const override + { + static std::string k("/Names"); + return k; + } + virtual bool keyValid(QPDFObjectHandle oh) const override + { + return oh.isString(); + } + virtual int compareKeys( + QPDFObjectHandle a, QPDFObjectHandle b) const override + { + if (! (keyValid(a) && keyValid(b))) + { + // We don't call this without calling keyValid first + throw std::logic_error("comparing invalid keys"); + } + auto as = a.getUTF8Value(); + auto bs = b.getUTF8Value(); + return ((as < bs) ? -1 : (as > bs) ? 1 : 0); + } +}; + +static NameTreeDetails name_tree_details; QPDFNameTreeObjectHelper::Members::~Members() { } -QPDFNameTreeObjectHelper::Members::Members() +QPDFNameTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) : + impl(std::make_shared<NNTreeImpl>(name_tree_details, nullptr, oh, false)) { } QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) : QPDFObjectHelper(oh), - m(new Members()) + m(new Members(oh)) { - updateMap(oh); } QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper() { } -void -QPDFNameTreeObjectHelper::updateMap(QPDFObjectHandle oh) -{ - if (this->m->seen.count(oh.getObjGen())) - { - return; - } - this->m->seen.insert(oh.getObjGen()); - QPDFObjectHandle names = oh.getKey("/Names"); - if (names.isArray()) - { - int nitems = names.getArrayNItems(); - int i = 0; - while (i < nitems - 1) - { - QPDFObjectHandle name = names.getArrayItem(i); - if (name.isString()) - { - ++i; - QPDFObjectHandle obj = names.getArrayItem(i); - this->m->entries[name.getUTF8Value()] = obj; - } - ++i; - } - } - QPDFObjectHandle kids = oh.getKey("/Kids"); - if (kids.isArray()) - { - int nitems = kids.getArrayNItems(); - for (int i = 0; i < nitems; ++i) - { - updateMap(kids.getArrayItem(i)); - } - } -} - bool QPDFNameTreeObjectHelper::hasName(std::string const& name) { - return this->m->entries.count(name) != 0; + auto i = this->m->impl->find(QPDFObjectHandle::newUnicodeString(name)); + return (i != this->m->impl->end()); } bool QPDFNameTreeObjectHelper::findObject( std::string const& name, QPDFObjectHandle& oh) { - std::map<std::string, QPDFObjectHandle>::iterator i = - this->m->entries.find(name); - if (i == this->m->entries.end()) + auto i = this->m->impl->find(QPDFObjectHandle::newUnicodeString(name)); + if (i == this->m->impl->end()) { return false; } @@ -78,5 +71,12 @@ QPDFNameTreeObjectHelper::findObject( std::map<std::string, QPDFObjectHandle> QPDFNameTreeObjectHelper::getAsMap() const { - return this->m->entries; + std::map<std::string, QPDFObjectHandle> result; + for (auto i: *(this->m->impl)) + { + result.insert( + std::make_pair(i.first.getUTF8Value(), + i.second)); + } + return result; } diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc index 15930fcd..fa9a0c71 100644 --- a/libqpdf/QPDFNumberTreeObjectHelper.cc +++ b/libqpdf/QPDFNumberTreeObjectHelper.cc @@ -1,91 +1,84 @@ #include <qpdf/QPDFNumberTreeObjectHelper.hh> +#include <qpdf/NNTree.hh> -QPDFNumberTreeObjectHelper::Members::~Members() -{ -} - -QPDFNumberTreeObjectHelper::Members::Members() +class NumberTreeDetails: public NNTreeDetails { -} - -QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) : - QPDFObjectHelper(oh), - m(new Members()) -{ - updateMap(oh); -} - -void -QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh) -{ - if (this->m->seen.count(oh.getObjGen())) + public: + virtual std::string const& itemsKey() const override { - return; + static std::string k("/Nums"); + return k; } - this->m->seen.insert(oh.getObjGen()); - QPDFObjectHandle nums = oh.getKey("/Nums"); - if (nums.isArray()) + virtual bool keyValid(QPDFObjectHandle oh) const override { - int nitems = nums.getArrayNItems(); - int i = 0; - while (i < nitems - 1) - { - QPDFObjectHandle num = nums.getArrayItem(i); - if (num.isInteger()) - { - ++i; - QPDFObjectHandle obj = nums.getArrayItem(i); - this->m->entries[num.getIntValue()] = obj; - } - ++i; - } + return oh.isInteger(); } - QPDFObjectHandle kids = oh.getKey("/Kids"); - if (kids.isArray()) + virtual int compareKeys( + QPDFObjectHandle a, QPDFObjectHandle b) const override { - int nitems = kids.getArrayNItems(); - for (int i = 0; i < nitems; ++i) + if (! (keyValid(a) && keyValid(b))) { - updateMap(kids.getArrayItem(i)); + // We don't call this without calling keyValid first + throw std::logic_error("comparing invalid keys"); } + auto as = a.getIntValue(); + auto bs = b.getIntValue(); + return ((as < bs) ? -1 : (as > bs) ? 1 : 0); } +}; + +static NumberTreeDetails number_tree_details; + +QPDFNumberTreeObjectHelper::Members::~Members() +{ +} + +QPDFNumberTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) : + impl(std::make_shared<NNTreeImpl>(number_tree_details, nullptr, oh, false)) +{ } +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) : + QPDFObjectHelper(oh), + m(new Members(oh)) +{ +} QPDFNumberTreeObjectHelper::numtree_number QPDFNumberTreeObjectHelper::getMin() { - if (this->m->entries.empty()) + auto i = this->m->impl->begin(); + if (i == this->m->impl->end()) { return 0; } - // Our map is sorted in reverse. - return this->m->entries.rbegin()->first; + return (*i).first.getIntValue(); } QPDFNumberTreeObjectHelper::numtree_number QPDFNumberTreeObjectHelper::getMax() { - if (this->m->entries.empty()) + auto i = this->m->impl->last(); + if (i == this->m->impl->end()) { return 0; } - // Our map is sorted in reverse. - return this->m->entries.begin()->first; + return (*i).first.getIntValue(); } bool QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx) { - return this->m->entries.count(idx) != 0; + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx)); + return (i != this->m->impl->end()); } bool QPDFNumberTreeObjectHelper::findObject( numtree_number idx, QPDFObjectHandle& oh) { - Members::idx_map::iterator i = this->m->entries.find(idx); - if (i == this->m->entries.end()) + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx)); + if (i == this->m->impl->end()) { return false; } @@ -98,13 +91,13 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow( numtree_number idx, QPDFObjectHandle& oh, numtree_number& offset) { - Members::idx_map::iterator i = this->m->entries.lower_bound(idx); - if (i == this->m->entries.end()) + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx), true); + if (i == this->m->impl->end()) { return false; } oh = (*i).second; - offset = idx - (*i).first; + offset = idx - (*i).first.getIntValue(); return true; } @@ -112,10 +105,11 @@ std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle> QPDFNumberTreeObjectHelper::getAsMap() const { std::map<numtree_number, QPDFObjectHandle> result; - for (Members::idx_map::const_iterator iter = this->m->entries.begin(); - iter != this->m->entries.end(); ++iter) + for (auto i: *(this->m->impl)) { - result[(*iter).first] = (*iter).second; + result.insert( + std::make_pair(i.first.getIntValue(), + i.second)); } return result; } diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 40b022d6..ca15611a 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -33,6 +33,7 @@ SRCS_libqpdf = \ libqpdf/InsecureRandomDataProvider.cc \ libqpdf/JSON.cc \ libqpdf/MD5.cc \ + libqpdf/NNTree.cc \ libqpdf/OffsetInputSource.cc \ libqpdf/Pipeline.cc \ libqpdf/Pl_AES_PDF.cc \ diff --git a/libqpdf/qpdf/NNTree.hh b/libqpdf/qpdf/NNTree.hh new file mode 100644 index 00000000..910fb225 --- /dev/null +++ b/libqpdf/qpdf/NNTree.hh @@ -0,0 +1,105 @@ +#ifndef NNTREE_HH +#define NNTREE_HH + +#include <qpdf/QPDF.hh> +#include <qpdf/QPDFObjectHandle.hh> + +#include <iterator> +#include <list> + +class NNTreeDetails +{ + public: + virtual std::string const& itemsKey() const = 0; + virtual bool keyValid(QPDFObjectHandle) const = 0; + virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0; +}; + +class NNTreeIterator: public std::iterator< + std::bidirectional_iterator_tag, + std::pair<QPDFObjectHandle, QPDFObjectHandle>, + void, + std::pair<QPDFObjectHandle, QPDFObjectHandle>*, + std::pair<QPDFObjectHandle, QPDFObjectHandle>> +{ + friend class NNTreeImpl; + public: + bool valid() const; + NNTreeIterator& operator++(); + NNTreeIterator operator++(int) + { + NNTreeIterator t = *this; + ++(*this); + return t; + } + NNTreeIterator& operator--(); + NNTreeIterator operator--(int) + { + NNTreeIterator t = *this; + --(*this); + return t; + } + reference operator*(); + bool operator==(NNTreeIterator const& other) const; + bool operator!=(NNTreeIterator const& other) const + { + return ! operator==(other); + } + + private: + class PathElement + { + public: + PathElement(QPDFObjectHandle const& node, int kid_number); + QPDFObjectHandle getNextKid(bool backward); + + QPDFObjectHandle node; + int kid_number; + }; + + NNTreeIterator(NNTreeDetails const& details) : + details(details), + item_number(-1) + { + } + void deepen(QPDFObjectHandle node, bool first); + void setItemNumber(QPDFObjectHandle const& node, int); + void addPathElement(QPDFObjectHandle const& node, int kid_number); + void increment(bool backward); + + NNTreeDetails const& details; + std::list<PathElement> path; + QPDFObjectHandle node; + int item_number; +}; + +class NNTreeImpl +{ + public: + typedef NNTreeIterator iterator; + + NNTreeImpl(NNTreeDetails const&, QPDF*, QPDFObjectHandle&, + bool auto_repair = true); + iterator begin(); + iterator end(); + iterator last(); + iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false); + + private: + int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node); + int binarySearch( + QPDFObjectHandle key, QPDFObjectHandle items, + int num_items, bool return_prev_if_not_found, + int (NNTreeImpl::*compare)(QPDFObjectHandle& key, + QPDFObjectHandle& node, + int item)); + int compareKeyItem( + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx); + int compareKeyKid( + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx); + + NNTreeDetails const& details; + QPDFObjectHandle oh; +}; + +#endif // NNTREE_HH |