aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDF_pages.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2012-06-21 16:42:18 +0200
committerJay Berkenbilt <ejb@ql.org>2012-06-21 21:01:02 +0200
commiteb802cfa8c7109504ad10bf4c89c47c876d9a382 (patch)
treea537b466b108509a59c26de3dfa5fa9e2b65caac /libqpdf/QPDF_pages.cc
parente01ae1968b79841797b2ae59eda00b867604e3f9 (diff)
downloadqpdf-eb802cfa8c7109504ad10bf4c89c47c876d9a382.tar.zst
Implement page manipulation APIs
Diffstat (limited to 'libqpdf/QPDF_pages.cc')
-rw-r--r--libqpdf/QPDF_pages.cc237
1 files changed, 146 insertions, 91 deletions
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index bd631c96..930e8bd1 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -6,6 +6,40 @@
#include <qpdf/QUtil.hh>
#include <qpdf/QPDFExc.hh>
+// In support of page manipulation APIs, these methods internally
+// maintain state about pages in a pair of data structures: all_pages,
+// which is a vector of page objects, and pageobj_to_pages_pos, which
+// maps a page object to its position in the all_pages array.
+// Unfortunately, the getAllPages() method returns a const reference
+// to all_pages and has been in the public API long before the
+// introduction of mutation APIs, so we're pretty much stuck with it.
+// Anyway, there are lots of calls to it in the library, so the
+// efficiency of having it cached is probably worth keeping it.
+
+// The goal of this code is to ensure that the all_pages vector, which
+// users may have a reference to, and the pageobj_to_pages_pos map,
+// which users will not have access to, remain consistent outside of
+// any call to the library. As long as users only touch the /Pages
+// structure through page-specific API calls, they never have to worry
+// about anything, and this will also stay consistent. If a user
+// touches anything about the /Pages structure outside of these calls
+// (such as by directly looking up and manipulating the underlying
+// objects), they can call updatePagesCache() to bring things back in
+// sync.
+
+// If the user doesn't ever use the page manipulation APIs, then qpdf
+// leaves the /Pages structure alone. If the user does use the APIs,
+// then we push all inheritable objects down and flatten the /Pages
+// tree. This makes it easier for us to keep /Pages, all_pages, and
+// pageobj_to_pages_pos internally consistent at all times.
+
+// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the
+// Pages structure consistent should remain in as few places as
+// possible. As of initial writing, only flattenPagesTree,
+// insertPage, and removePage, along with methods they call, are
+// concerned with it. Everything else goes through one of those
+// methods.
+
std::vector<QPDFObjectHandle> const&
QPDF::getAllPages()
{
@@ -44,152 +78,173 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
}
}
-// FIXXX here down
-
void
-QPDF::clearPagesCache()
+QPDF::updateAllPagesCache()
{
+ // Force regeneration of the pages cache. We force immediate
+ // recalculation of all_pages since users may have references to
+ // it that they got from calls to getAllPages(). We can defer
+ // recalculation of pageobj_to_pages_pos until needed.
+ QTC::TC("qpdf", "QPDF updateAllPagesCache");
this->all_pages.clear();
this->pageobj_to_pages_pos.clear();
+ getAllPages();
}
void
QPDF::flattenPagesTree()
{
- clearPagesCache();
+ // If not already done, flatten the /Pages structure and
+ // initialize pageobj_to_pages_pos.
- // FIXME: more specific method, we don't want to generate the extra stuff.
- // We also need cheap fixup after addPage/removePage.
+ if (! this->pageobj_to_pages_pos.empty())
+ {
+ return;
+ }
- // no compressed objects to be produced here...
- std::map<int, int> object_stream_data;
- optimize(object_stream_data); // push down inheritance
+ // Push inherited objects down to the /Page level
+ optimizePagesTree(true);
+ getAllPages();
- std::vector<QPDFObjectHandle> kids = this->getAllPages();
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
- const int len = kids.size();
+ int const len = (int)this->all_pages.size();
for (int pos = 0; pos < len; ++pos)
{
- // populate pageobj_to_pages_pos
- ObjGen og(kids[pos].getObjectID(), kids[pos].getGeneration());
- if (! this->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second)
- {
- // insert failed: duplicate entry found
- *out_stream << "WARNING: duplicate page reference found, "
- << "but currently not fully supported." << std::endl;
- }
-
- // fix parent links
- kids[pos].replaceKey("/Parent", pages);
+ // populate pageobj_to_pages_pos and fix parent pointer
+ insertPageobjToPage(this->all_pages[pos], pos, true);
+ this->all_pages[pos].replaceKey("/Parent", pages);
}
- pages.replaceKey("/Kids", QPDFObjectHandle::newArray(kids));
+ pages.replaceKey("/Kids", QPDFObjectHandle::newArray(this->all_pages));
// /Count has not changed
assert(pages.getKey("/Count").getIntValue() == len);
}
-int
-QPDF::findPage(int objid, int generation)
-{
- if (this->pageobj_to_pages_pos.empty())
- {
- flattenPagesTree();
- }
- std::map<ObjGen, int>::iterator it =
- this->pageobj_to_pages_pos.find(ObjGen(objid, generation));
- if (it != this->pageobj_to_pages_pos.end())
- {
- return (*it).second;
- }
- return -1; // throw?
-}
-
-int
-QPDF::findPage(QPDFObjectHandle const& pageoh)
+void
+QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos,
+ bool check_duplicate)
{
- if (!pageoh.isInitialized())
+ ObjGen og(obj.getObjectID(), obj.getGeneration());
+ bool duplicate =
+ (! this->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second);
+ if (duplicate && check_duplicate)
{
- return -1;
- // TODO? throw
+ QTC::TC("qpdf", "QPDF duplicate page reference");
+ setLastObjectDescription("page " + QUtil::int_to_string(pos) +
+ " (numbered from zero)",
+ og.obj, og.gen);
+ throw QPDFExc(qpdf_e_pages, this->file->getName(),
+ this->last_object_description, 0,
+ "duplicate page reference found;"
+ " this would cause loss of data");
}
- return findPage(pageoh.getObjectID(), pageoh.getGeneration());
}
void
-QPDF::addPage(QPDFObjectHandle newpage, bool first)
+QPDF::insertPage(QPDFObjectHandle newpage, int pos)
{
- if (this->pageobj_to_pages_pos.empty())
- {
- flattenPagesTree();
- }
+ // pos is numbered from 0, so pos = 0 inserts at the begining and
+ // pos = npages adds to the end.
- newpage.assertPageObject(); // FIXME: currently private
+ flattenPagesTree();
+ newpage.assertPageObject();
+
+ QTC::TC("qpdf", "QPDF insert page",
+ (pos == 0) ? 0 : // insert at beginning
+ (pos == ((int)this->all_pages.size())) ? 1 : // insert at end
+ 2); // insert in middle
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
QPDFObjectHandle kids = pages.getKey("/Kids");
+ assert ((pos >= 0) && (pos <= (int)this->all_pages.size()));
newpage.replaceKey("/Parent", pages);
- if (first)
- {
- kids.insertItem(0, newpage);
- }
- else
+ kids.insertItem(pos, newpage);
+ int npages = kids.getArrayNItems();
+ pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
+ this->all_pages.insert(this->all_pages.begin() + pos, newpage);
+ assert((int)this->all_pages.size() == npages);
+ for (int i = pos + 1; i < npages; ++i)
{
- kids.appendItem(newpage);
+ insertPageobjToPage(this->all_pages[i], i, false);
}
- pages.replaceKey("/Count",
- QPDFObjectHandle::newInteger(kids.getArrayNItems()));
-
- // FIXME: this is overkill, but cache is now stale
- clearPagesCache();
+ insertPageobjToPage(newpage, pos, true);
+ assert((int)this->pageobj_to_pages_pos.size() == npages);
}
void
-QPDF::addPageAt(QPDFObjectHandle newpage, bool before,
- QPDFObjectHandle const &refpage)
+QPDF::removePage(QPDFObjectHandle page)
{
- int refpos = findPage(refpage); // also ensures flat /Pages
- if (refpos == -1)
- {
- throw "Could not find refpage";
- }
-
- newpage.assertPageObject();
+ int pos = findPage(page); // also ensures flat /Pages
+ QTC::TC("qpdf", "QPDF remove page",
+ (pos == 0) ? 0 : // remove at beginning
+ (pos == ((int)this->all_pages.size() - 1)) ? 1 : // remove at end
+ 2); // remove in middle
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
QPDFObjectHandle kids = pages.getKey("/Kids");
+ kids.eraseItem(pos);
+ int npages = kids.getArrayNItems();
+ pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
+ this->all_pages.erase(this->all_pages.begin() + pos);
+ assert((int)this->all_pages.size() == npages);
+ this->pageobj_to_pages_pos.erase(
+ ObjGen(page.getObjectID(), page.getGeneration()));
+ assert((int)this->pageobj_to_pages_pos.size() == npages);
+ for (int i = pos; i < npages; ++i)
+ {
+ insertPageobjToPage(this->all_pages[i], i, false);
+ }
+}
+
+void
+QPDF::addPageAt(QPDFObjectHandle newpage, bool before,
+ QPDFObjectHandle refpage)
+{
+ int refpos = findPage(refpage);
if (! before)
{
++refpos;
}
-
- newpage.replaceKey("/Parent", pages);
- kids.insertItem(refpos, newpage);
- pages.replaceKey("/Count",
- QPDFObjectHandle::newInteger(kids.getArrayNItems()));
-
- // FIXME: this is overkill, but cache is now stale
- clearPagesCache();
+ insertPage(newpage, refpos);
}
+
void
-QPDF::removePage(QPDFObjectHandle const& pageoh)
+QPDF::addPage(QPDFObjectHandle newpage, bool first)
{
- int pos = findPage(pageoh); // also ensures flat /Pages
- if (pos == -1)
+ getAllPages();
+ if (first)
{
- throw "Can't remove non-existing page";
+ insertPage(newpage, 0);
}
+ else
+ {
+ insertPage(newpage, (int)this->all_pages.size());
+ }
+}
- QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
- QPDFObjectHandle kids = pages.getKey("/Kids");
-
- kids.eraseItem(pos);
- pages.replaceKey("/Count",
- QPDFObjectHandle::newInteger(kids.getArrayNItems()));
+int
+QPDF::findPage(QPDFObjectHandle& page)
+{
+ page.assertPageObject();
+ return findPage(page.getObjectID(), page.getGeneration());
+}
- // FIXME: this is overkill, but cache is now stale
- clearPagesCache();
+int
+QPDF::findPage(int objid, int generation)
+{
+ flattenPagesTree();
+ std::map<ObjGen, int>::iterator it =
+ this->pageobj_to_pages_pos.find(ObjGen(objid, generation));
+ if (it == this->pageobj_to_pages_pos.end())
+ {
+ setLastObjectDescription("page object", objid, generation);
+ QPDFExc(qpdf_e_pages, this->file->getName(),
+ this->last_object_description, 0,
+ "page object not referenced in /Pages tree");
+ }
+ return (*it).second;
}