From 9fb174b9e9ff3d715091d435942de1e2d9db72ef Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 4 Mar 2021 14:46:07 -0500 Subject: Major rework of handling form fields when copying pages (fixes #509) --- include/qpdf/QPDF.hh | 3 +- include/qpdf/QPDFAcroFormDocumentHelper.hh | 87 +++++++++++++++++++++++++----- include/qpdf/QPDFFormFieldObjectHelper.hh | 10 +++- include/qpdf/QPDFPageDocumentHelper.hh | 12 +++++ include/qpdf/QPDFPageObjectHelper.hh | 17 +++--- 5 files changed, 106 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index d2161acd..48b92137 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -628,7 +628,8 @@ class QPDF // identical to the identically named methods there, except that // these versions use QPDFObjectHandle instead of // QPDFPageObjectHelper, so please see comments in that file for - // descriptions. + // descriptions. There are subtleties you need to know about, so + // please look at the comments there. QPDF_DLL void pushInheritedAttributesToPage(); QPDF_DLL diff --git a/include/qpdf/QPDFAcroFormDocumentHelper.hh b/include/qpdf/QPDFAcroFormDocumentHelper.hh index 8f2b18dc..3cc456e5 100644 --- a/include/qpdf/QPDFAcroFormDocumentHelper.hh +++ b/include/qpdf/QPDFAcroFormDocumentHelper.hh @@ -103,20 +103,33 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper void invalidateCache(); QPDF_DLL - bool - hasAcroForm(); + bool hasAcroForm(); // Add a form field, initializing the document's AcroForm - // dictionary if needed. Calling this method invalidates the - // cache, which makes it possible to add a field that is not yet - // associated with an annotation or page. + // dictionary if needed, updating the cache if necessary. Note + // that you are adding fields that are copies of other fields, + // this method may result in multiple fields existing with the + // same qualified name, which can have unexpected side effects. In + // that case, you should use addAndRenameFormFields() instead. QPDF_DLL void addFormField(QPDFFormFieldObjectHelper); + // Add a collection of form fields making sure that their fully + // qualified names don't conflict with already present form + // fields. Fields within the collection of new fields that have + // the same name as each other will continue to do so. + QPDF_DLL + void addAndRenameFormFields(std::vector fields); + // Remove fields from the fields array QPDF_DLL void removeFormFields(std::set const&); + // Set the name of a field, updating internal records of field + // names. Name should be UTF-8 encoded. + QPDF_DLL + void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); + // Return a vector of all terminal fields in a document. Terminal // fields are fields that have no children that are also fields. // Terminal fields may still have children that are annotations. @@ -124,8 +137,15 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper // list, but you can still reach them through the getParent method // of the field object helper. QPDF_DLL - std::vector - getFormFields(); + std::vector getFormFields(); + + // Return all the form fields that have the given fully-qualified + // name and also have an explicit "/T" attribute. For this + // information to be accurate, any changes to field names must be + // done through setFormFieldName() above. + QPDF_DLL + std::set + getFieldsWithQualifiedName(std::string const& name) const; // Return the annotations associated with a terminal field. Note // that in the case of a field having a single annotation, the @@ -198,7 +218,11 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper // avoid the expensive process of creating one for each call to // transformAnnotations. New fields and annotations are not added // to the document or pages. You have to do that yourself after - // calling transformAnnotations. + // calling transformAnnotations. If this operation will leave + // orphaned fields behind, such as if you are replacing the old + // annotations with the new ones on the same page and the fields + // and annotations are not shared, you will also need to remove + // the old fields to prevent them from hanging round unreferenced. QPDF_DLL void transformAnnotations( QPDFObjectHandle old_annots, @@ -209,11 +233,33 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDF* from_qpdf = nullptr, QPDFAcroFormDocumentHelper* from_afdh = nullptr); - // Copy form fields from a page in a different QPDF object to this - // QPDF. If copied_fields is not null, it will be initialized with - // the fields that were copied. Items in the vector are objects in - // the receiving QPDF (the one associated with this - // QPDFAcroFormDocumentHelper). + // Copy form fields and annotations from one page to another, + // allowing the from page to be in a different QPDF or in the same + // QPDF. This would typically be called after calling addPage to + // add field/annotation awareness. When just copying the page by + // itself, annotations end up being shared, and fields end up + // being omitted because there is no reference to the field from + // the page. This method ensures that each separate copy of a page + // has private annotations and that fields and annotations are + // properly updated to resolve conflicts that may occur from + // common resource and field names across documents. It is + // basically a wrapper around transformAnnotations that handles + // updating the receiving page. If new_fields is non-null, any + // newly created fields are added to it. + QPDF_DLL + void fixCopiedAnnotations( + QPDFObjectHandle to_page, + QPDFObjectHandle from_page, + QPDFAcroFormDocumentHelper& from_afdh, + std::set* new_fields = nullptr); + + // copyFieldsFromForeignPage was added in qpdf 10.2 and made to do + // nothing in 10.3. It wasn't actually doing the right thing and + // would result in broken files in all but the simplest case of a + // single page from one file being added to another file, as + // happens with qpdf --split-pages. + [[deprecated("Use fixCopiedAnnotations instead")]] + // ABI: delete this method QPDF_DLL void copyFieldsFromForeignPage( QPDFPageObjectHelper foreign_page, @@ -225,6 +271,19 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper void traverseField(QPDFObjectHandle field, QPDFObjectHandle parent, int depth, std::set& visited); + QPDFObjectHandle getOrCreateAcroForm(); + void adjustInheritedFields( + QPDFObjectHandle obj, + bool override_da, std::string const& from_default_da, + bool override_q, int from_default_q); + void adjustDefaultAppearances( + QPDFObjectHandle obj, + std::map> const& dr_map); + void adjustAppearanceStream( + QPDFObjectHandle stream, + std::map> dr_map); class Members { @@ -243,6 +302,8 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper std::vector > field_to_annotations; std::map annotation_to_field; + std::map field_to_name; + std::map> name_to_fields; }; PointerHolder m; diff --git a/include/qpdf/QPDFFormFieldObjectHelper.hh b/include/qpdf/QPDFFormFieldObjectHelper.hh index 6052f2e3..4085371e 100644 --- a/include/qpdf/QPDFFormFieldObjectHelper.hh +++ b/include/qpdf/QPDFFormFieldObjectHelper.hh @@ -172,12 +172,18 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper QPDF_DLL std::vector getChoices(); - // Set an attribute to the given value + // Set an attribute to the given value. If you have a + // QPDFAcroFormDocumentHelper and you want to set the name of a + // field, use QPDFAcroFormDocumentHelper::setFormFieldName + // instead. QPDF_DLL void setFieldAttribute(std::string const& key, QPDFObjectHandle value); // Set an attribute to the given value as a Unicode string (UTF-16 - // BE encoded). The input string should be UTF-8 encoded. + // BE encoded). The input string should be UTF-8 encoded. If you + // have a QPDFAcroFormDocumentHelper and you want to set the name + // of a field, use QPDFAcroFormDocumentHelper::setFormFieldName + // instead. QPDF_DLL void setFieldAttribute(std::string const& key, std::string const& utf8_value); diff --git a/include/qpdf/QPDFPageDocumentHelper.hh b/include/qpdf/QPDFPageDocumentHelper.hh index 6abe61e4..02c039b5 100644 --- a/include/qpdf/QPDFPageDocumentHelper.hh +++ b/include/qpdf/QPDFPageDocumentHelper.hh @@ -79,6 +79,18 @@ class QPDFPageDocumentHelper: public QPDFDocumentHelper // file. You could do this, for example, to convert a page into a // form XObject, though for that, you're better off using // QPDFPageObjectHelper::getFormXObjectForPage. + // + // This method does not have any specific awareness of annotations + // or form fields, so if you just add a page without thinking + // about it, you might end up with two pages that share form + // fields or annotations. While the page may look fine, it will + // probably not function properly with regard to interactive + // features. To work around this, you should called + // QPDFAcroFormDocumentHelper::fixCopiedAnnotations. A future + // version of qpdf will likely provide a higher-level interface + // for copying pages around that will handle document-level + // constructs in a less error-prone fashion. + QPDF_DLL void addPage(QPDFPageObjectHelper newpage, bool first); diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh index 2feb1199..5e162466 100644 --- a/include/qpdf/QPDFPageObjectHelper.hh +++ b/include/qpdf/QPDFPageObjectHelper.hh @@ -354,13 +354,16 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // AcroForm dictionary as well. You can use this to copy // annotations from a page that was converted to a form XObject // and added to another page. For example of this, see - // examples/pdf-overlay-page.cc. Note that if you use this to copy - // annotations from one page to another in the same document and - // you use a transformation matrix other than the identity matrix, - // it will alter the original annotation, which is probably not - // what you want. Also, if you copy the same page multiple times - // with different transformation matrices, the effect will be - // cumulative, which is probably also not what you want. + // examples/pdf-overlay-page.cc. This method calls + // QPDFAcroFormDocumentHelper::transformAnnotations, which will + // copy annotations and form fields so that you can copy + // annotations from a source page to any number of other pages, + // even with different matrices, and maintain independence from + // the original annotations. See also + // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be + // used if you copy a page and want to repair the annotations on + // the destination page to make them independent from the original + // page's annotations. // // If you pass in a QPDFAcroFormDocumentHelper*, the method will // use that instead of creating one in the function. Creating -- cgit v1.2.3-54-g00ecf