From 0c7c7e4ba485fd39f5b6d41fa2924c607d2eeda0 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 25 Jun 2022 13:55:45 -0400 Subject: Track whether certain page modifying methods have been called We need to know whether pushInheritedAttributesToPage or getAllPages have been called when generating JSON output. When reading the JSON back in, we have to call the same methods so that object numbers will line up properly. --- TODO | 17 ++++++++++++----- include/qpdf/QPDF.hh | 7 +++++++ libqpdf/QPDF.cc | 14 ++++++++++++++ libqpdf/QPDF_optimization.cc | 1 + libqpdf/QPDF_pages.cc | 1 + qpdf/test_driver.cc | 5 +++++ 6 files changed, 40 insertions(+), 5 deletions(-) diff --git a/TODO b/TODO index 383756f9..270e9df6 100644 --- a/TODO +++ b/TODO @@ -9,10 +9,6 @@ Before Release: * Release qtest with updates to qtest-driver and copy back into qpdf Next: -* QPDF -- track whether the pages tree was modified (whether - getAllPages was ever called. If so, consider generating a non-flat - pages tree before creating output to better handle files with lots - of pages. * JSON v2 fixes Pending changes: @@ -44,6 +40,12 @@ Pending changes: drivers from the qpdf directory into the latter category as long as doing so isn't too troublesome from a coverage standpoint. * Consider adding fuzzer code for JSON +* Consider generating a non-flat pages tree before creating output to + better handle files with lots of pages. If there are more than 256 + pages, add a second layer with the second layer nodes having no more + than 256 nodes and being as evenly sizes as possible. Don't worry + about the case of more than 65,536 pages. If the top node has more + than 256 children, we'll live with it. Soon: Break ground on "Document-level work" @@ -81,7 +83,8 @@ JSON v2 fixes "qpdf": [ { "jsonversion": 2, - "repairpagestree": false, + "pushedinheritedpageresources": false, + "calledgetallpages": false, "maxobjectid": 10 }, { @@ -110,6 +113,10 @@ JSON v2 fixes the same number of elements whose individual elements are validated according to the regular rules. + * When reading back in, we'll have to call + pushInheritedAttributesToPage or getAllPages based on the values + of the metadata. + * Support json v2 in the C API. At a minimum, write_json, create_from_json, and update_from_json need to be there and should take the same kinds of functions as the C API for logger. diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 52bc3579..f0c9f6b4 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -709,6 +709,11 @@ class QPDF QPDF_DLL std::vector const& getAllPages(); + QPDF_DLL + bool everCalledGetAllPages() const; + QPDF_DLL + bool everPushedInheritedAttributesToPages() const; + // These methods, given a page object or its object/generation // number, returns the 0-based index into the array returned by // getAllPages() for that page. An exception is thrown if the page @@ -1690,6 +1695,8 @@ class QPDF std::vector all_pages; std::map pageobj_to_pages_pos; bool pushed_inherited_attributes_to_pages; + bool ever_pushed_inherited_attributes_to_pages; + bool ever_called_get_all_pages; std::vector warnings; std::map object_copiers; std::shared_ptr copied_streams; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 2d92bf1d..b51c5571 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -222,6 +222,8 @@ QPDF::Members::Members() : attempt_recovery(true), encp(new EncryptionParameters), pushed_inherited_attributes_to_pages(false), + ever_pushed_inherited_attributes_to_pages(false), + ever_called_get_all_pages(false), copied_stream_data_provider(0), reconstructed_xref(false), fixed_dangling_refs(false), @@ -2879,3 +2881,15 @@ QPDF::stopOnError(std::string const& message) this->m->file->getLastOffset(), message); } + +bool +QPDF::everCalledGetAllPages() const +{ + return this->m->ever_called_get_all_pages; +} + +bool +QPDF::everPushedInheritedAttributesToPages() const +{ + return this->m->ever_pushed_inherited_attributes_to_pages; +} diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index da0db663..1fd6da6f 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -162,6 +162,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) " pushing inherited attributes to pages"); } this->m->pushed_inherited_attributes_to_pages = true; + this->m->ever_pushed_inherited_attributes_to_pages = true; } void diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index bfd593a2..bd3f80a6 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -54,6 +54,7 @@ QPDF::getAllPages() // Note that pushInheritedAttributesToPage may also be used to // initialize this->m->all_pages. if (this->m->all_pages.empty()) { + this->m->ever_called_get_all_pages = true; std::set visited; std::set seen; QPDFObjectHandle pages = getRoot().getKey("/Pages"); diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 2384ddef..0eb6ed8d 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -691,7 +691,9 @@ test_15(QPDF& pdf, char const* arg2) // Remove pages from various places, checking to make sure // that our pages reference is getting updated. assert(pages.size() == 10); + assert(!pdf.everPushedInheritedAttributesToPages()); pdf.removePage(pages.back()); // original page 9 + assert(pdf.everPushedInheritedAttributesToPages()); assert(pages.size() == 9); pdf.removePage(*pages.begin()); // original page 0 assert(pages.size() == 8); @@ -767,7 +769,9 @@ static void test_16(QPDF& pdf, char const* arg2) { // Insert a page manually and then update the cache. + assert(!pdf.everCalledGetAllPages()); std::vector const& all_pages = pdf.getAllPages(); + assert(pdf.everCalledGetAllPages()); QPDFObjectHandle contents = createPageContents(pdf, "New page 10"); QPDFObjectHandle page = @@ -785,6 +789,7 @@ test_16(QPDF& pdf, char const* arg2) kids.appendItem(page); assert(all_pages.size() == 10); pdf.updateAllPagesCache(); + assert(pdf.everCalledGetAllPages()); assert(all_pages.size() == 11); assert(all_pages.back().getObjGen() == page.getObjGen()); -- cgit v1.2.3-54-g00ecf