From 0b05111db80469d3f556209bfd856af1fda9b142 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 19 Jun 2018 09:26:41 -0400 Subject: Implement helper class for interactive forms --- libqpdf/QPDFAcroFormDocumentHelper.cc | 252 ++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 libqpdf/QPDFAcroFormDocumentHelper.cc (limited to 'libqpdf/QPDFAcroFormDocumentHelper.cc') diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc new file mode 100644 index 00000000..7e70fd92 --- /dev/null +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -0,0 +1,252 @@ +#include + +#include +#include + +QPDFAcroFormDocumentHelper::Members::~Members() +{ +} + +QPDFAcroFormDocumentHelper::Members::Members() : + cache_valid(false) +{ +} + +QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) : + QPDFDocumentHelper(qpdf), + m(new Members()) +{ +} + +void +QPDFAcroFormDocumentHelper::invalidateCache() +{ + this->m->cache_valid = false; + this->m->field_to_annotations.clear(); + this->m->annotation_to_field.clear(); +} + +bool +QPDFAcroFormDocumentHelper::hasAcroForm() +{ + return this->qpdf.getRoot().hasKey("/AcroForm"); +} + +std::vector +QPDFAcroFormDocumentHelper::getFormFields() +{ + analyze(); + std::vector result; + for (std::map >::iterator iter = + this->m->field_to_annotations.begin(); + iter != this->m->field_to_annotations.end(); ++iter) + { + result.push_back(this->qpdf.getObjectByObjGen((*iter).first)); + } + return result; +} + +std::vector +QPDFAcroFormDocumentHelper::getAnnotationsForField(QPDFFormFieldObjectHelper h) +{ + analyze(); + std::vector result; + QPDFObjGen og(h.getObjectHandle().getObjGen()); + if (this->m->field_to_annotations.count(og)) + { + result = this->m->field_to_annotations[og]; + } + return result; +} + +std::vector +QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) +{ + return h.getAnnotations("/Widget"); +} + +QPDFFormFieldObjectHelper +QPDFAcroFormDocumentHelper::getFieldForAnnotation(QPDFAnnotationObjectHelper h) +{ + QPDFObjectHandle oh = h.getObjectHandle(); + if (! (oh.isDictionary() && + oh.getKey("/Subtype").isName() && + (oh.getKey("/Subtype").getName() == "/Widget"))) + { + throw std::logic_error( + "QPDFAnnotationObjectHelper::getFieldForAnnotation called for" + " non-/Widget annotation"); + } + analyze(); + QPDFFormFieldObjectHelper result(QPDFObjectHandle::newNull()); + QPDFObjGen og(oh.getObjGen()); + if (this->m->annotation_to_field.count(og)) + { + result = this->m->annotation_to_field[og]; + } + return result; +} + +void +QPDFAcroFormDocumentHelper::analyze() +{ + if (this->m->cache_valid) + { + return; + } + this->m->cache_valid = true; + QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm"); + if (! (acroform.isDictionary() && acroform.hasKey("/Fields"))) + { + return; + } + QPDFObjectHandle fields = acroform.getKey("/Fields"); + if (! fields.isArray()) + { + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper fields not array"); + acroform.warnIfPossible( + "/Fields key of /AcroForm dictionary is not an array; ignoring"); + fields = QPDFObjectHandle::newArray(); + } + + // Traverse /AcroForm to find annotations and map them + // bidirectionally to fields. + + std::set visited; + size_t nfields = fields.getArrayNItems(); + QPDFObjectHandle null(QPDFObjectHandle::newNull()); + for (size_t i = 0; i < nfields; ++i) + { + traverseField(fields.getArrayItem(i), null, 0, visited); + } + + // All Widget annotations should have been encountered by + // traversing /AcroForm, but in case any weren't, find them by + // walking through pages, and treat any widget annotation that is + // not associated with a field as its own field. This just ensures + // that requesting the field for any annotation we find through a + // page's /Annots list will have some associated field. Note that + // a file that contains this kind of error will probably not + // actually work with most viewers. + + QPDFPageDocumentHelper dh(this->qpdf); + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper ph(*iter); + std::vector annots = + getWidgetAnnotationsForPage(ph); + for (std::vector::iterator i2 = + annots.begin(); + i2 != annots.end(); ++i2) + { + QPDFObjectHandle annot((*i2).getObjectHandle()); + QPDFObjGen og(annot.getObjGen()); + if (this->m->annotation_to_field.count(og) == 0) + { + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget"); + // This is not supposed to happen, but it's easy + // enough for us to handle this case. Treat the + // annotation as its own field. This could allow qpdf + // to sensibly handle a case such as a PDF creator + // adding a self-contained annotation (merged with the + // field dictionary) to the page's /Annots array and + // forgetting to also put it in /AcroForm. + annot.warnIfPossible( + "this widget annotation is not" + " reachable from /AcroForm in the document catalog"); + this->m->annotation_to_field[og] = + QPDFFormFieldObjectHelper(annot); + this->m->field_to_annotations[og].push_back( + QPDFAnnotationObjectHelper(annot)); + } + } + } +} + +void +QPDFAcroFormDocumentHelper::traverseField( + QPDFObjectHandle field, QPDFObjectHandle parent, int depth, + std::set& visited) +{ + if (depth > 100) + { + // Arbitrarily cut off recursion at a fixed depth to avoid + // specially crafted files that could cause stack overflow. + return; + } + if (! field.isIndirect()) + { + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field"); + field.warnIfPossible( + "encountered a direct object as a field or annotation while" + " traversing /AcroForm; ignoring field or annotation"); + return; + } + if (! field.isDictionary()) + { + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper non-dictionary field"); + field.warnIfPossible( + "encountered a non-dictionary as a field or annotation while" + " traversing /AcroForm; ignoring field or annotation"); + return; + } + QPDFObjGen og(field.getObjGen()); + if (visited.count(og) != 0) + { + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper loop"); + field.warnIfPossible("loop detected while traversing /AcroForm"); + return; + } + visited.insert(og); + + // A dictionary encountered while traversing the /AcroForm field + // may be a form field, an annotation, or the merger of the two. A + // field that has no fields below it is a terminal. If a terminal + // field looks like an annotation, it is an annotation because + // annotation dictionary fields can be merged with terminal field + // dictionaries. Otherwise, the annotation fields might be there + // to be inherited by annotations below it. + + bool is_annotation = false; + bool is_field = (0 == depth); + QPDFObjectHandle kids = field.getKey("/Kids"); + if (kids.isArray()) + { + is_field = true; + size_t nkids = kids.getArrayNItems(); + for (size_t k = 0; k < nkids; ++k) + { + traverseField(kids.getArrayItem(k), field, 1 + depth, visited); + } + } + else + { + if (field.hasKey("/Parent")) + { + is_field = true; + } + if (field.hasKey("/Subtype") || + field.hasKey("/Rect") || + field.hasKey("/AP")) + { + is_annotation = true; + } + } + + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper field found", + (depth == 0) ? 0 : 1); + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper annotation found", + (is_field ? 0 : 1)); + + if (is_annotation) + { + QPDFObjectHandle our_field = (is_field ? field : parent); + this->m->field_to_annotations[our_field.getObjGen()].push_back( + QPDFAnnotationObjectHelper(field)); + this->m->annotation_to_field[og] = + QPDFFormFieldObjectHelper(our_field); + } +} -- cgit v1.2.3-70-g09d2