aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--TODO12
-rw-r--r--include/qpdf/QPDFPageObjectHelper.hh11
-rw-r--r--libqpdf/QPDFPageObjectHelper.cc56
-rw-r--r--manual/qpdf-manual.xml20
-rw-r--r--qpdf/qtest/qpdf.test1
-rw-r--r--qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf385
-rw-r--r--qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdfbin0 -> 4232 bytes
-rw-r--r--qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdfbin0 -> 2841 bytes
9 files changed, 473 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index 78aa88b8..d93a4c65 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2021-01-02 Jay Berkenbilt <ejb@ql.org>
+
+ * QPDFPageObjectHelper::externalizeInlineImages can be called with
+ form XObjects as well as pages.
+
+ * Bug fix: QPDFPageObjectHelper::externalizeInlineImages was not
+ descending into form XObjects on a page. It now does this by
+ default. In the extremely unlikely event that anyone was actually
+ depending on the old behavior, it is available by passing
+ shallow=true to the externalizeInlineImages call.
+
+ * Bug fix: QPDFObjectHandle::filterPageContents was broken for
+ pages with an array of content streams. This caused
+ externalize-inline-images to also be broken for this case.
+
2021-01-01 Jay Berkenbilt <ejb@ql.org>
* Add methods to QPDFPageObjectHelper: forEachXObject,
diff --git a/TODO b/TODO
index 0cf8017d..8922a6ad 100644
--- a/TODO
+++ b/TODO
@@ -21,18 +21,6 @@ Candidates for upcoming release
* big page even with --remove-unreferenced-resources=yes, even with --empty
* optimize image failure because of colorspace
-* Externalize inline images doesn't walk into form XObjects. In
- general:
-
- * Check QPDFPageObjectHelper and see what can be applied to form
- XObjects. Maybe think about generalizing it to work with form
- XObjects.
-
- * There is an increasing amount of logic in qpdf.cc that should
- probably move into the library. This includes externalizing inline
- images and page splitting as those operations become more
- elaborate, particularly with handling of form XObjects.
-
* See if the tokenizer is a performance bottleneck and, if so,
optimize it. We might end up with a high-performance tokenizer that
has a different interface but still ultimately creates the same
diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh
index 1152a7a5..ccf56630 100644
--- a/include/qpdf/QPDFPageObjectHelper.hh
+++ b/include/qpdf/QPDFPageObjectHelper.hh
@@ -123,8 +123,15 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getFormXObjects();
- // Convert each inline image to an external (normal) image if the
- // size is at least the specified number of bytes.
+ // Converts each inline image to an external (normal) image if the
+ // size is at least the specified number of bytes. This method
+ // works with pages or form XObjects. By default, it recursively
+ // processes nested form XObjects. Pass true as shallow to avoid
+ // this behavior. Prior to qpdf 10.1, form XObjects were ignored,
+ // but this was considered a bug.
+ QPDF_DLL
+ void externalizeInlineImages(size_t min_size, bool shallow);
+ // ABI: make shallow optional (default false) and merge
QPDF_DLL
void externalizeInlineImages(size_t min_size = 0);
diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc
index ef563dc2..f7fcd395 100644
--- a/libqpdf/QPDFPageObjectHelper.cc
+++ b/libqpdf/QPDFPageObjectHelper.cc
@@ -486,20 +486,50 @@ QPDFPageObjectHelper::getFormXObjects()
void
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
{
- QPDFObjectHandle resources = getAttribute("/Resources", true);
- // Calling mergeResources also ensures that /XObject becomes
- // direct and is not shared with other pages.
- resources.mergeResources(
- QPDFObjectHandle::parse("<< /XObject << >> >>"));
- InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
- Pl_Buffer b("new page content");
- filterContents(&iit, &b);
- if (iit.any_images)
+ externalizeInlineImages(min_size, false);
+}
+
+void
+QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
+{
+ if (shallow)
+ {
+ QPDFObjectHandle resources = getAttribute("/Resources", true);
+ // Calling mergeResources also ensures that /XObject becomes
+ // direct and is not shared with other pages.
+ resources.mergeResources(
+ QPDFObjectHandle::parse("<< /XObject << >> >>"));
+ InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
+ Pl_Buffer b("new page content");
+ filterContents(&iit, &b);
+ if (iit.any_images)
+ {
+ if (this->oh.isFormXObject())
+ {
+ this->oh.replaceStreamData(
+ b.getBuffer(),
+ QPDFObjectHandle::newNull(),
+ QPDFObjectHandle::newNull());
+ }
+ else
+ {
+ this->oh.replaceKey(
+ "/Contents",
+ QPDFObjectHandle::newStream(
+ this->oh.getOwningQPDF(), b.getBuffer()));
+ }
+ }
+ }
+ else
{
- getObjectHandle().replaceKey(
- "/Contents",
- QPDFObjectHandle::newStream(
- this->oh.getOwningQPDF(), b.getBuffer()));
+ externalizeInlineImages(min_size, true);
+ forEachFormXObject(
+ true,
+ [min_size](QPDFObjectHandle& obj,
+ QPDFObjectHandle&, std::string const&) {
+ QPDFPageObjectHelper(obj).externalizeInlineImages(
+ min_size, true);
+ });
}
}
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index bae87536..409313ef 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -4991,6 +4991,26 @@ print "\n";
</listitem>
</itemizedlist>
</listitem>
+ <listitem>
+ <para>
+ Bug Fixes
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ <function>QPDFPageObjectHelper::externalizeInlineImages</function>
+ was not externalizing images referenced from form XObjects
+ that appeared on the page.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>QPDFObjectHandle::filterPageContents</function>
+ was broken for pages with multiple content streams.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
</itemizedlist>
</listitem>
</varlistentry>
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 32751a98..9931e7dc 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -905,6 +905,7 @@ $td->runtest("check output",
my @eii_tests = (
['inline-images', 80],
['large-inline-image', 1024],
+ ['nested-form-xobjects-inline-images', 20],
);
$n_tests += 4 * scalar(@eii_tests);
$n_compare_pdfs += 2 * scalar(@eii_tests);
diff --git a/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf
new file mode 100644
index 00000000..3449150e
--- /dev/null
+++ b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf
@@ -0,0 +1,385 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents 4 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 6 0 R
+ >>
+ /ProcSet 7 0 R
+ /XObject <<
+ /Fx1 8 0 R
+ /IIm1 10 0 R
+ /IIm2 12 0 R
+ >>
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 20 0
+4 0 obj
+<<
+ /Length 5 0 R
+>>
+stream
+q
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Page) Tj
+ET
+q
+100 0 0 100 72 600 cm
+/IIm1 Do
+
+Q
+q
+100 0 0 100 192 600 cm
+/IIm2 Do
+
+Q
+
+Q
+q
+1.00000 0.00000 0.00000 1.00000 72.00000 200.00000 cm
+/Fx1 Do
+Q
+endstream
+endobj
+
+5 0 obj
+186
+endobj
+
+%% Original object ID: 10 0
+6 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 11 0
+7 0 obj
+[
+ /PDF
+ /Text
+ /ImageC
+]
+endobj
+
+%% Original object ID: 12 0
+8 0 obj
+<<
+ /BBox [
+ 0
+ 0
+ 300
+ 500
+ ]
+ /Resources <<
+ /Font <<
+ /F1 14 0 R
+ >>
+ /ProcSet 15 0 R
+ /XObject <<
+ /Fx1 16 0 R
+ /IIm1 18 0 R
+ /IIm2 20 0 R
+ >>
+ >>
+ /Subtype /Form
+ /Type /XObject
+ /Length 9 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 0 320 Td
+ (FX1) Tj
+ET
+q
+100 0 0 100 000 200 cm
+/IIm1 Do
+
+Q
+q
+100 0 0 100 120 200 cm
+/IIm2 Do
+
+Q
+q
+1.00000 0.00000 0.00000 1.00000 0.00000 0.00000 cm
+/Fx1 Do
+Q
+endstream
+endobj
+
+9 0 obj
+177
+endobj
+
+%% Original object ID: 18 0
+10 0 obj
+<<
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Height 15
+ /Subtype /Image
+ /Type /XObject
+ /Width 15
+ /Length 11 0 R
+>>
+stream
+`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+endstream
+endobj
+
+%QDF: ignore_newline
+11 0 obj
+225
+endobj
+
+%% Original object ID: 19 0
+12 0 obj
+<<
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Height 15
+ /Subtype /Image
+ /Type /XObject
+ /Width 15
+ /Length 13 0 R
+>>
+stream
+@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+endstream
+endobj
+
+%QDF: ignore_newline
+13 0 obj
+225
+endobj
+
+%% Original object ID: 14 0
+14 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 15 0
+15 0 obj
+[
+ /PDF
+ /Text
+ /ImageC
+]
+endobj
+
+%% Original object ID: 16 0
+16 0 obj
+<<
+ /BBox [
+ 0
+ 0
+ 300
+ 200
+ ]
+ /Resources <<
+ /Font <<
+ /F1 14 0 R
+ >>
+ /ProcSet 15 0 R
+ /XObject <<
+ /IIm1 22 0 R
+ /IIm2 24 0 R
+ >>
+ >>
+ /Subtype /Form
+ /Type /XObject
+ /Length 17 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 0 120 Td
+ (FX2) Tj
+ET
+q
+100 0 0 100 0 0 cm
+/IIm1 Do
+
+Q
+q
+100 0 0 100 120 0 cm
+/IIm2 Do
+
+Q
+endstream
+endobj
+
+17 0 obj
+108
+endobj
+
+%% Original object ID: 21 0
+18 0 obj
+<<
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Height 15
+ /Subtype /Image
+ /Type /XObject
+ /Width 15
+ /Length 19 0 R
+>>
+stream
+@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+endstream
+endobj
+
+%QDF: ignore_newline
+19 0 obj
+225
+endobj
+
+%% Original object ID: 22 0
+20 0 obj
+<<
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Height 15
+ /Subtype /Image
+ /Type /XObject
+ /Width 15
+ /Length 21 0 R
+>>
+stream
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+endstream
+endobj
+
+%QDF: ignore_newline
+21 0 obj
+225
+endobj
+
+%% Original object ID: 23 0
+22 0 obj
+<<
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Height 15
+ /Subtype /Image
+ /Type /XObject
+ /Width 15
+ /Length 23 0 R
+>>
+stream
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+endstream
+endobj
+
+%QDF: ignore_newline
+23 0 obj
+225
+endobj
+
+%% Original object ID: 24 0
+24 0 obj
+<<
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Height 15
+ /Subtype /Image
+ /Type /XObject
+ /Width 15
+ /Length 25 0 R
+>>
+stream
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+endstream
+endobj
+
+%QDF: ignore_newline
+25 0 obj
+225
+endobj
+
+xref
+0 26
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000242 00000 n
+0000000563 00000 n
+0000000804 00000 n
+0000000852 00000 n
+0000000998 00000 n
+0000001071 00000 n
+0000001536 00000 n
+0000001584 00000 n
+0000001996 00000 n
+0000002045 00000 n
+0000002457 00000 n
+0000002506 00000 n
+0000002653 00000 n
+0000002727 00000 n
+0000003107 00000 n
+0000003156 00000 n
+0000003568 00000 n
+0000003617 00000 n
+0000004029 00000 n
+0000004078 00000 n
+0000004490 00000 n
+0000004539 00000 n
+0000004951 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 26
+ /ID [<55269d37282af9edc76855e4cb859987><31415926535897932384626433832795>]
+>>
+startxref
+4972
+%%EOF
diff --git a/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdf b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdf
new file mode 100644
index 00000000..871c8c52
--- /dev/null
+++ b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdf b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdf
new file mode 100644
index 00000000..e2aea58e
--- /dev/null
+++ b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdf
Binary files differ