From be3a8c0e7a5edd30cb8a0f2e7cbc56d0e5bed982 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 23 Feb 2021 07:57:14 -0500 Subject: Keep only referenced form fields in --pages --- include/qpdf/QPDFAcroFormDocumentHelper.hh | 10 ++++-- libqpdf/QPDFAcroFormDocumentHelper.cc | 21 +++++++++-- qpdf/qpdf.cc | 56 ++++++++++++++++++++++++++--- qpdf/qpdf.testcov | 3 ++ qpdf/qtest/qpdf.test | 20 ++++++++++- qpdf/qtest/qpdf/kept-no-fields.pdf | Bin 0 -> 1269 bytes qpdf/qtest/qpdf/kept-some-fields.pdf | Bin 0 -> 7787 bytes 7 files changed, 99 insertions(+), 11 deletions(-) create mode 100644 qpdf/qtest/qpdf/kept-no-fields.pdf create mode 100644 qpdf/qtest/qpdf/kept-some-fields.pdf diff --git a/include/qpdf/QPDFAcroFormDocumentHelper.hh b/include/qpdf/QPDFAcroFormDocumentHelper.hh index fd28a579..8f2b18dc 100644 --- a/include/qpdf/QPDFAcroFormDocumentHelper.hh +++ b/include/qpdf/QPDFAcroFormDocumentHelper.hh @@ -140,7 +140,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper std::vector getWidgetAnnotationsForPage(QPDFPageObjectHelper); - // Return form fields for a page. + // Return top-level form fields for a page. QPDF_DLL std::vector getFormFieldsForPage(QPDFPageObjectHelper); @@ -210,11 +210,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDFAcroFormDocumentHelper* from_afdh = nullptr); // Copy form fields from a page in a different QPDF object to this - // QPDF. + // QPDF. If copied_fields is not null, it will be initialized with + // the fields that were copied. Items in the vector are objects in + // the receiving QPDF (the one associated with this + // QPDFAcroFormDocumentHelper). QPDF_DLL void copyFieldsFromForeignPage( QPDFPageObjectHelper foreign_page, - QPDFAcroFormDocumentHelper& foreign_afdh); + QPDFAcroFormDocumentHelper& foreign_afdh, + std::vector* copied_fields = nullptr); private: void analyze(); diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index 611d469a..78d3c5c7 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -135,15 +135,21 @@ QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) std::vector QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) { + std::set added; std::vector result; auto widget_annotations = getWidgetAnnotationsForPage(ph); for (auto annot: widget_annotations) { auto field = getFieldForAnnotation(annot); field = field.getTopLevelField(); - if (field.getObjectHandle().isDictionary()) + auto og = field.getObjectHandle().getObjGen(); + if (! added.count(og)) { - result.push_back(field); + added.insert(og); + if (field.getObjectHandle().isDictionary()) + { + result.push_back(field); + } } } return result; @@ -674,18 +680,27 @@ QPDFAcroFormDocumentHelper::transformAnnotations( void QPDFAcroFormDocumentHelper::copyFieldsFromForeignPage( QPDFPageObjectHelper foreign_page, - QPDFAcroFormDocumentHelper& foreign_afdh) + QPDFAcroFormDocumentHelper& foreign_afdh, + std::vector* copied_fields) { std::set added; for (auto field: foreign_afdh.getFormFieldsForPage(foreign_page)) { auto new_field = this->qpdf.copyForeignObject( field.getObjectHandle()); + if (! new_field.isIndirect()) + { + new_field = this->qpdf.makeIndirectObject(new_field); + } auto og = new_field.getObjGen(); if (! added.count(og)) { addFormField(new_field); added.insert(og); + if (copied_fields) + { + copied_fields->push_back(new_field); + } } } } diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 2080a44a..75f046ca 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -5846,6 +5846,7 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) std::map> afdh_map; auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); + std::set referenced_fields; for (std::vector::iterator iter = parsed_specs.begin(); iter != parsed_specs.end(); ++iter) @@ -5906,7 +5907,13 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) else if (other_afdh->hasAcroForm()) { QTC::TC("qpdf", "qpdf copy form fields in pages"); - this_afdh->copyFieldsFromForeignPage(to_copy, *other_afdh); + std::vector copied_fields; + this_afdh->copyFieldsFromForeignPage( + to_copy, *other_afdh, &copied_fields); + for (auto const& cf: copied_fields) + { + referenced_fields.insert(cf.getObjGen()); + } } } if (page_data.qpdf->anyWarnings()) @@ -5929,16 +5936,57 @@ static void handle_page_specs(QPDF& pdf, Options& o, bool& warnings) // Delete page objects for unused page in primary. This prevents // those objects from being preserved by being referred to from - // other places, such as the outlines dictionary. + // other places, such as the outlines dictionary. Also make sure + // we keep form fields from pages we preserved. for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) { - if (selected_from_orig.count(QIntC::to_int(pageno)) == 0) + auto page = orig_pages.at(pageno); + if (selected_from_orig.count(QIntC::to_int(pageno))) + { + for (auto field: this_afdh->getFormFieldsForPage(page)) + { + QTC::TC("qpdf", "qpdf pages keeping field from original"); + referenced_fields.insert(field.getObjectHandle().getObjGen()); + } + } + else { pdf.replaceObject( - orig_pages.at(pageno).getObjectHandle().getObjGen(), + page.getObjectHandle().getObjGen(), QPDFObjectHandle::newNull()); } } + // Remove unreferenced form fields + if (this_afdh->hasAcroForm()) + { + auto acroform = pdf.getRoot().getKey("/AcroForm"); + auto fields = acroform.getKey("/Fields"); + if (fields.isArray()) + { + auto new_fields = QPDFObjectHandle::newArray(); + if (fields.isIndirect()) + { + new_fields = pdf.makeIndirectObject(new_fields); + } + for (auto const& field: fields.aitems()) + { + if (referenced_fields.count(field.getObjGen())) + { + new_fields.appendItem(field); + } + } + if (new_fields.getArrayNItems() > 0) + { + QTC::TC("qpdf", "qpdf keep some fields in pages"); + acroform.replaceKey("/Fields", new_fields); + } + else + { + QTC::TC("qpdf", "qpdf no more fields in pages"); + pdf.getRoot().removeKey("/AcroForm"); + } + } + } } static void handle_rotations(QPDF& pdf, Options& o) diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index a68e88df..8d0607e3 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -577,3 +577,6 @@ QPDFAcroFormDocumentHelper field with parent 3 QPDFAcroFormDocumentHelper modify ap matrix 0 qpdf copy form fields in split_pages 0 qpdf copy form fields in pages 0 +qpdf keep some fields in pages 0 +qpdf pages keeping field from original 0 +qpdf no more fields in pages 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 2f12d397..da98020a 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -2414,7 +2414,7 @@ foreach my $f (qw(screen print)) show_ntests(); # ---------- $td->notify("--- Copy Annotations ---"); -$n_tests += 21; +$n_tests += 25; $td->runtest("complex copy annotations", {$td->COMMAND => @@ -2479,6 +2479,24 @@ for (my $i = 1; $i <= 2; ++$i) {$td->FILE => "split-out-$i.pdf"}, {$td->FILE => "fields-split-$i.pdf"}); } +$td->runtest("keeping some fields", + {$td->COMMAND => + "qpdf --static-id fields-two-pages.pdf" . + " --pages . 1 minimal.pdf -- a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "kept-some-fields.pdf"}); +$td->runtest("not keeping any fields", + {$td->COMMAND => + "qpdf --static-id kept-some-fields.pdf" . + " --pages . 2 -- a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "kept-no-fields.pdf"}); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/kept-no-fields.pdf b/qpdf/qtest/qpdf/kept-no-fields.pdf new file mode 100644 index 00000000..08610fb3 Binary files /dev/null and b/qpdf/qtest/qpdf/kept-no-fields.pdf differ diff --git a/qpdf/qtest/qpdf/kept-some-fields.pdf b/qpdf/qtest/qpdf/kept-some-fields.pdf new file mode 100644 index 00000000..1a6cc5d1 Binary files /dev/null and b/qpdf/qtest/qpdf/kept-some-fields.pdf differ -- cgit v1.2.3-54-g00ecf