aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-12-18 19:08:55 +0100
committerJay Berkenbilt <ejb@ql.org>2018-12-18 22:46:10 +0100
commitf38df27aa3eae905e3ee90365099335e317173d8 (patch)
tree3a0c8ac95e389889cbcee3a92924d2652e2d42a0
parent077d3d451204393d17b9a14c2145487c35fce572 (diff)
downloadqpdf-f38df27aa3eae905e3ee90365099335e317173d8.tar.zst
Add QPDFNumberTreeObjectHelper
-rw-r--r--ChangeLog5
-rw-r--r--include/qpdf/QPDFNumberTreeObjectHelper.hh110
-rw-r--r--libqpdf/QPDFNumberTreeObjectHelper.cc122
-rw-r--r--libqpdf/build.mk1
-rw-r--r--qpdf/qtest/qpdf.test10
-rw-r--r--qpdf/qtest/qpdf/number-tree.out15
-rw-r--r--qpdf/qtest/qpdf/number-tree.pdf171
-rw-r--r--qpdf/test_driver.cc30
8 files changed, 464 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 578d77ed..27f790f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2018-12-18 Jay Berkenbilt <ejb@ql.org>
+ * Add QPDFNumberTreeObjectHelper class. This class provides useful
+ methods for dealing with number trees, which are discussed in
+ section 7.9.7 of the PDF spec (ISO-32000). Page label dictionaries
+ are represented as number trees.
+
* New method QPDFObjectHandle::wrapInArray returns the object
itself if it is an array. Otherwise, it returns an array
containing the object. This is useful for dealing with PDF data
diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh
new file mode 100644
index 00000000..be67c887
--- /dev/null
+++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh
@@ -0,0 +1,110 @@
+// Copyright (c) 2005-2018 Jay Berkenbilt
+//
+// This file is part of qpdf.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Versions of qpdf prior to version 7 were released under the terms
+// of version 2.0 of the Artistic License. At your option, you may
+// continue to consider qpdf to be licensed under those terms. Please
+// see the manual for additional information.
+
+#ifndef QPDFNUMBERTREEOBJECTHELPER_HH
+#define QPDFNUMBERTREEOBJECTHELPER_HH
+
+#include <qpdf/QPDFObjectHelper.hh>
+#include <qpdf/QPDFObjGen.hh>
+#include <functional>
+#include <map>
+
+#include <qpdf/DLL.h>
+
+// This is an object helper for number trees. See section 7.9.7 in the
+// PDF spec (ISO 32000) for a description of number trees. This
+// implementation disregards stated limits and sequencing and simply
+// builds a map from numerical index to object. If the array of
+// numbers does not contain a numerical value where expected, this
+// implementation silently skips forward until it finds a number.
+
+class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
+{
+ public:
+ QPDF_DLL
+ QPDFNumberTreeObjectHelper(QPDFObjectHandle);
+
+ typedef long long int numtree_number;
+
+ // Return overall minimum and maximum indices
+ QPDF_DLL
+ numtree_number getMin();
+ QPDF_DLL
+ numtree_number getMax();
+
+ // Return whether the number tree has an explicit entry for this
+ // number.
+ QPDF_DLL
+ bool hasIndex(numtree_number idx);
+
+ // Find an object with a specific index. If found, returns true
+ // and initializes oh.
+ QPDF_DLL
+ bool findObject(numtree_number idx, QPDFObjectHandle& oh);
+ // Find the object at the index or, if not found, the object whose
+ // index is the highest index less than the requested index. If
+ // the requested index is less than the minimum, return false.
+ // Otherwise, return true, initialize oh to the object, and set
+ // offset to the difference between the requested index and the
+ // actual index. For example, if a number tree has values for 3
+ // and 6 and idx is 5, this method would return true, initialize
+ // oh to the value with index 3, and set offset to 2 (5 - 3).
+ QPDF_DLL
+ bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh,
+ numtree_number& offset);
+
+ typedef std::map<numtree_number, QPDFObjectHandle> idx_map;
+ QPDF_DLL
+ idx_map getAsMap() const;
+
+ private:
+ class Members
+ {
+ friend class QPDFNumberTreeObjectHelper;
+ typedef QPDFNumberTreeObjectHelper::numtree_number numtree_number;
+
+ public:
+ QPDF_DLL
+ ~Members();
+
+ private:
+ Members();
+ Members(Members const&);
+
+ // Use a reverse sorted map so we can use the lower_bound
+ // method for searching. lower_bound returns smallest entry
+ // not before the searched entry, meaning that the searched
+ // entry is the lower bound. There's also an upper_bound
+ // method, but it does not do what you'd think it should.
+ // lower_bound implements >=, and upper_bound implements >.
+ typedef std::map<numtree_number,
+ QPDFObjectHandle,
+ std::greater<numtree_number> > idx_map;
+ idx_map entries;
+ std::set<QPDFObjGen> seen;
+ };
+
+ void updateMap(QPDFObjectHandle oh);
+
+ PointerHolder<Members> m;
+};
+
+#endif // QPDFNUMBERTREEOBJECTHELPER_HH
diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc
new file mode 100644
index 00000000..bf360cf5
--- /dev/null
+++ b/libqpdf/QPDFNumberTreeObjectHelper.cc
@@ -0,0 +1,122 @@
+#include <qpdf/QPDFNumberTreeObjectHelper.hh>
+#include <qpdf/QTC.hh>
+
+QPDFNumberTreeObjectHelper::Members::~Members()
+{
+}
+
+QPDFNumberTreeObjectHelper::Members::Members()
+{
+}
+
+QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :
+ QPDFObjectHelper(oh),
+ m(new Members())
+{
+ updateMap(oh);
+}
+
+void
+QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh)
+{
+ if (this->m->seen.count(oh.getObjGen()))
+ {
+ return;
+ }
+ this->m->seen.insert(oh.getObjGen());
+ QPDFObjectHandle nums = oh.getKey("/Nums");
+ if (nums.isArray())
+ {
+ size_t nitems = nums.getArrayNItems();
+ size_t i = 0;
+ while (i < nitems - 1)
+ {
+ QPDFObjectHandle num = nums.getArrayItem(i);
+ if (num.isInteger())
+ {
+ ++i;
+ QPDFObjectHandle obj = nums.getArrayItem(i);
+ this->m->entries[num.getIntValue()] = obj;
+ }
+ ++i;
+ }
+ }
+ QPDFObjectHandle kids = oh.getKey("/Kids");
+ if (kids.isArray())
+ {
+ size_t nitems = kids.getArrayNItems();
+ for (size_t i = 0; i < nitems; ++i)
+ {
+ updateMap(kids.getArrayItem(i));
+ }
+ }
+}
+
+
+QPDFNumberTreeObjectHelper::numtree_number
+QPDFNumberTreeObjectHelper::getMin()
+{
+ if (this->m->entries.empty())
+ {
+ return 0;
+ }
+ // Our map is sorted in reverse.
+ return this->m->entries.rbegin()->first;
+}
+
+QPDFNumberTreeObjectHelper::numtree_number
+QPDFNumberTreeObjectHelper::getMax()
+{
+ if (this->m->entries.empty())
+ {
+ return 0;
+ }
+ // Our map is sorted in reverse.
+ return this->m->entries.begin()->first;
+}
+
+bool
+QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx)
+{
+ return this->m->entries.count(idx) != 0;
+}
+
+bool
+QPDFNumberTreeObjectHelper::findObject(
+ numtree_number idx, QPDFObjectHandle& oh)
+{
+ Members::idx_map::iterator i = this->m->entries.find(idx);
+ if (i == this->m->entries.end())
+ {
+ return false;
+ }
+ oh = (*i).second;
+ return true;
+}
+
+bool
+QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
+ numtree_number idx, QPDFObjectHandle& oh,
+ numtree_number& offset)
+{
+ Members::idx_map::iterator i = this->m->entries.lower_bound(idx);
+ if (i == this->m->entries.end())
+ {
+ return false;
+ }
+ oh = (*i).second;
+ offset = idx - (*i).first;
+ return true;
+}
+
+std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle>
+QPDFNumberTreeObjectHelper::getAsMap() const
+{
+ std::map<numtree_number, QPDFObjectHandle> result;
+ for (Members::idx_map::const_iterator iter = this->m->entries.begin();
+ iter != this->m->entries.end(); ++iter)
+ {
+ result[(*iter).first] = (*iter).second;
+ }
+ return result;
+}
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
index 61ea4b2d..147bb16a 100644
--- a/libqpdf/build.mk
+++ b/libqpdf/build.mk
@@ -40,6 +40,7 @@ SRCS_libqpdf = \
libqpdf/QPDFAnnotationObjectHelper.cc \
libqpdf/QPDFExc.cc \
libqpdf/QPDFFormFieldObjectHelper.cc \
+ libqpdf/QPDFNumberTreeObjectHelper.cc \
libqpdf/QPDFObjGen.cc \
libqpdf/QPDFObject.cc \
libqpdf/QPDFObjectHandle.cc \
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 17497710..9ea5b61d 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -226,6 +226,16 @@ foreach my $input (@ext_inputs)
}
show_ntests();
# ----------
+$td->notify("--- Number Trees ---");
+$n_tests += 1;
+
+$td->runtest("number trees",
+ {$td->COMMAND => "test_driver 46 number-tree.pdf"},
+ {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+show_ntests();
+# ----------
$td->notify("--- Page API Tests ---");
$n_tests += 9;
diff --git a/qpdf/qtest/qpdf/number-tree.out b/qpdf/qtest/qpdf/number-tree.out
new file mode 100644
index 00000000..b4b16535
--- /dev/null
+++ b/qpdf/qtest/qpdf/number-tree.out
@@ -0,0 +1,15 @@
+1 one
+2 two
+3 three
+5 five
+6 six
+9 nine
+11 elephant
+12 twelve
+15 fifteen
+19 nineteen
+20 twenty
+22 twenty-two
+23 twenty-three
+29 twenty-nine
+test 46 done
diff --git a/qpdf/qtest/qpdf/number-tree.pdf b/qpdf/qtest/qpdf/number-tree.pdf
new file mode 100644
index 00000000..35c1e375
--- /dev/null
+++ b/qpdf/qtest/qpdf/number-tree.pdf
@@ -0,0 +1,171 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+3 0 obj
+<<
+ /Contents 4 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 6 0 R
+ >>
+ /ProcSet 7 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+4 0 obj
+<<
+ /Length 5 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+44
+endobj
+
+6 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+7 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+8 0 obj
+<<
+ /Kids [
+ 9 0 R
+ 10 0 R
+ ]
+>>
+endobj
+
+9 0 obj
+<<
+ /Kids [
+ 11 0 R
+ 12 0 R
+ ]
+ /Limits [
+ 0
+ 19
+ ]
+>>
+endobj
+
+10 0 obj
+<<
+ /Limits [
+ 20
+ 29
+ ]
+ /Nums [
+ 20 (twenty)
+ 22 (twenty-two)
+ 23 (twenty-three)
+ 29 (twenty-nine)
+ ]
+>>
+endobj
+
+11 0 obj
+<<
+ /Limits [
+ 0
+ 9
+ ]
+ /Nums [
+ 1 (one)
+ 2 (two)
+ 3 (three)
+ 5 (five)
+ 6 (six)
+ 9 (nine)
+ ]
+>>
+endobj
+
+12 0 obj
+<<
+ /Limits [
+ 11
+ 19
+ ]
+ /Nums [
+ 11 (elephant)
+ 12 (twelve)
+ 15 (fifteen)
+ 19 (nineteen)
+ ]
+>>
+endobj
+
+
+xref
+0 13
+0000000000 65535 f
+0000000025 00000 n
+0000000079 00000 n
+0000000161 00000 n
+0000000376 00000 n
+0000000475 00000 n
+0000000494 00000 n
+0000000612 00000 n
+0000000647 00000 n
+0000000704 00000 n
+0000000791 00000 n
+0000000937 00000 n
+0000001078 00000 n
+trailer <<
+ /Root 1 0 R
+ /QTest 8 0 R
+ /Size 13
+ /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
+>>
+startxref
+1215
+%%EOF
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 74b34396..3bcb173e 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -6,6 +6,7 @@
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
+#include <qpdf/QPDFNumberTreeObjectHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <qpdf/Pl_StdioFile.hh>
@@ -1660,6 +1661,35 @@ void runtest(int n, char const* filename1, char const* arg2)
exit(3);
}
}
+ else if (n == 46)
+ {
+ // Test number tree. This test is crafted to work with
+ // number-tree.pdf
+ QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest");
+ QPDFNumberTreeObjectHelper ntoh(qtest);
+ QPDFNumberTreeObjectHelper::idx_map ntoh_map = ntoh.getAsMap();
+ for (QPDFNumberTreeObjectHelper::idx_map::iterator iter =
+ ntoh_map.begin();
+ iter != ntoh_map.end(); ++iter)
+ {
+ std::cout << (*iter).first << " "
+ << (*iter).second.getStringValue()
+ << std::endl;
+ }
+ assert(1 == ntoh.getMin());
+ assert(29 == ntoh.getMax());
+ assert(ntoh.hasIndex(6));
+ assert(! ntoh.hasIndex(500));
+ QPDFObjectHandle oh;
+ assert(! ntoh.findObject(4, oh));
+ assert(ntoh.findObject(3, oh));
+ assert("three" == oh.getStringValue());
+ QPDFNumberTreeObjectHelper::numtree_number offset = 0;
+ assert(! ntoh.findObjectAtOrBelow(0, oh, offset));
+ assert(ntoh.findObjectAtOrBelow(8, oh, offset));
+ assert("six" == oh.getStringValue());
+ assert(2 == offset);
+ }
else
{
throw std::runtime_error(std::string("invalid test ") +