aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-01-11 04:11:38 +0100
committerJay Berkenbilt <ejb@ql.org>2019-01-11 04:35:08 +0100
commitd24a120c7ffb4cbfd2dcebe63577d8704442f7bd (patch)
tree72c19aab1124a8a79b76a97eae0780d029bc9497
parent6b15579ace057c24040184357a4419261e095600 (diff)
downloadqpdf-d24a120c7ffb4cbfd2dcebe63577d8704442f7bd.tar.zst
Add QPDF::setImmediateCopyFrom
-rw-r--r--ChangeLog9
-rw-r--r--include/qpdf/QPDF.hh41
-rw-r--r--libqpdf/QPDF.cc20
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf73
-rw-r--r--qpdf/test_driver.cc53
6 files changed, 154 insertions, 43 deletions
diff --git a/ChangeLog b/ChangeLog
index 7e921d5c..2f1e7f95 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2019-01-10 Jay Berkenbilt <ejb@ql.org>
+
+ * Add new method QPDF::setImmediateCopyFrom. When called on a
+ source QPDF object, streams can be copied FROM that object to
+ other ones without having to keep the source QPDF or its input
+ source around. The cost is copying the streams into RAM. See
+ comments in QPDF.hh for setImmediateCopyFrom for a detailed
+ explanation.
+
2019-01-07 Jay Berkenbilt <ejb@ql.org>
* 8.3.0: release
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index f7229245..6ebdcd03 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -160,6 +160,39 @@ class QPDF
QPDF_DLL
void setAttemptRecovery(bool);
+ // Tell other QPDF objects that streams copied from this QPDF need
+ // to be fully copied when copyForeignObject is called on them.
+ // Calling setIgnoreXRefStreams(true) on a QPDF object makes it
+ // possible for the object and its input source to disappear
+ // before streams copied from it are written with the destination
+ // QPDF object. Confused? Ordinarily, if you are going to copy
+ // objects from a source QPDF object to a destination QPDF object
+ // using copyForeignObject or addPage, the source object's input
+ // source must stick around until after the destination PDF is
+ // written. If you call this method on the source QPDF object, it
+ // sends a signal to the destination object that it must fully
+ // copy the stream data when copyForeignObject. It will do this by
+ // making a copy in RAM. Ordinarily the stream data is copied
+ // lazily to avoid unnecessary duplication of the stream data.
+ // Note that the stream data is copied into RAM only once
+ // regardless of how many objects the stream is copied into. The
+ // result is that, if you called setImmediateCopyFrom(true) on a
+ // given QPDF object prior to copying any of its streams, you do
+ // not need to keep it or its input source around after copying
+ // its objects to another QPDF. This is true even if the source
+ // streams use StreamDataProvider. Note that this method is called
+ // on the QPDF object you are copying FROM, not the one you are
+ // copying to. The reasoning for this is that there's no reason a
+ // given QPDF may not get objects copied to it from a variety of
+ // other objects, some transient and some not. Since what's
+ // relevant is whether the source QPDF is transient, the method
+ // must be called on the source QPDF, not the destination one.
+ // Since this method will make a copy of the stream in RAM, so be
+ // sure you have enough memory to simultaneously hold all the
+ // streams you're copying.
+ QPDF_DLL
+ void setImmediateCopyFrom(bool);
+
// Other public methods
// Return the list of warnings that have been issued so far and
@@ -248,6 +281,13 @@ class QPDF
// original stream's QPDF object must stick around because the
// QPDF object is itself the source of the original stream data.
// For a more in-depth discussion, please see the TODO file.
+ // Starting in 8.3.1, you can call setImmediateCopyFrom(true) on
+ // the SOURCE QPDF object (the one you're copying FROM). If you do
+ // this prior to copying any of its objects, then neither the
+ // source QPDF object nor its input source needs to stick around
+ // at all regardless of the source. The cost is that the stream
+ // data is copied into RAM at the time copyForeignObject is
+ // called. See setImmediateCopyFrom for more information.
//
// The return value of this method is an indirect reference to the
// copied object in this file. This method is intended to be used
@@ -1283,6 +1323,7 @@ class QPDF
std::set<QPDFObjGen> attachment_streams;
bool reconstructed_xref;
bool fixed_dangling_refs;
+ bool immediate_copy_from;
// Linearization data
qpdf_offset_t first_xref_item_offset; // actual value from file
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 772a17cd..95ae9cab 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -147,6 +147,7 @@ QPDF::Members::Members() :
copied_stream_data_provider(0),
reconstructed_xref(false),
fixed_dangling_refs(false),
+ immediate_copy_from(false),
first_xref_item_offset(0),
uncompressed_after_compressed(false)
{
@@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val)
this->m->attempt_recovery = val;
}
+void
+QPDF::setImmediateCopyFrom(bool val)
+{
+ this->m->immediate_copy_from = val;
+}
+
std::vector<QPDFExc>
QPDF::getWarnings()
{
@@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects(
}
PointerHolder<Buffer> stream_buffer =
stream->getStreamDataBuffer();
+ if ((foreign_stream_qpdf->m->immediate_copy_from) &&
+ (stream_buffer.getPointer() == 0))
+ {
+ // Pull the stream data into a buffer before attempting
+ // the copy operation. Do it on the source stream so that
+ // if the source stream is copied multiple times, we don't
+ // have to keep duplicating the memory.
+ QTC::TC("qpdf", "QPDF immediate copy stream data");
+ foreign.replaceStreamData(foreign.getRawStreamData(),
+ dict.getKey("/Filter"),
+ dict.getKey("/DecodeParms"));
+ stream_buffer = stream->getStreamDataBuffer();
+ }
PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider =
stream->getStreamDataProvider();
if (stream_buffer.getPointer())
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 801004f3..43f5c5a7 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -410,3 +410,4 @@ QPDF_encryption attachment stream 0
QPDF pipe foreign encrypted stream 0
QPDF copy foreign stream with provider 0
QPDF copy foreign stream with buffer 0
+QPDF immediate copy stream data 0
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
index f2d6bad6..489aef8e 100644
--- a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
@@ -1,10 +1,10 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
-<< /Pages 5 0 R /Type /Catalog >>
+<< /Pages 6 0 R /Type /Catalog >>
endobj
2 0 obj
-<< /O1 6 0 R /O2 7 0 R /O3 8 0 R /This-is-QTest true >>
+<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >>
endobj
3 0 obj
<< /Length 20 >>
@@ -19,39 +19,45 @@ potato
endstream
endobj
5 0 obj
-<< /Count 3 /Kids [ 9 0 R 10 0 R 8 0 R ] /Type /Pages >>
+<< /Length 21 >>
+stream
+more data for stream
+endstream
endobj
6 0 obj
-[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 7 0 R >> 2.17828 ] >> /salad /O2 7 0 R /Stream1 11 0 R ]
+<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >>
endobj
7 0 obj
-<< /K1 [ 2.236 /O1 6 0 R 1.732 ] /O1 6 0 R /This-is-O2 true >>
+[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ]
endobj
8 0 obj
-<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 10 0 R /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
+<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >>
endobj
9 0 obj
-<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet 16 0 R >> /Type /Page >>
+<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
endobj
10 0 obj
-<< /Contents 17 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
+<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >>
endobj
11 0 obj
-<< /Stream2 18 0 R /This-is-Stream1 true /Length 18 >>
+<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
+endobj
+12 0 obj
+<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >>
stream
This is stream 1.
endstream
endobj
-12 0 obj
+13 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
endstream
endobj
-13 0 obj
+14 0 obj
<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
-14 0 obj
+15 0 obj
<< /Length 44 >>
stream
BT
@@ -61,46 +67,47 @@ BT
ET
endstream
endobj
-15 0 obj
+16 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
-16 0 obj
+17 0 obj
[ /PDF /Text ]
endobj
-17 0 obj
+18 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET
endstream
endobj
-18 0 obj
-<< /Stream1 11 0 R /This-is-Stream2 true /Length 18 >>
+19 0 obj
+<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >>
stream
This is stream 2.
endstream
endobj
xref
-0 19
+0 20
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000135 00000 n
0000000204 00000 n
0000000259 00000 n
-0000000331 00000 n
-0000000449 00000 n
-0000000527 00000 n
-0000000728 00000 n
-0000000874 00000 n
-0000001069 00000 n
-0000001175 00000 n
-0000001272 00000 n
-0000001372 00000 n
-0000001466 00000 n
-0000001574 00000 n
-0000001605 00000 n
-0000001702 00000 n
-trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R ] /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
+0000000329 00000 n
+0000000402 00000 n
+0000000520 00000 n
+0000000598 00000 n
+0000000799 00000 n
+0000000946 00000 n
+0000001141 00000 n
+0000001247 00000 n
+0000001344 00000 n
+0000001444 00000 n
+0000001538 00000 n
+0000001646 00000 n
+0000001677 00000 n
+0000001774 00000 n
+trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
-1808
+1880
%%EOF
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 71078618..1f00b31d 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2)
// Should get qtest plus only the O3 page and the page that O3
// points to. Inherited objects should be preserved. This test
// also exercises copying from a stream that has a buffer and
- // a provider, including copying a provider multiple times.
+ // a provider, including copying a provider multiple times. We
+ // also exercise setImmediateCopyFrom.
- Pl_Buffer p1("buffer");
- p1.write(QUtil::unsigned_char_pointer("new data for stream\n"),
- 20); // no null!
- p1.finish();
- PointerHolder<Buffer> b = p1.getBuffer();
- Provider* provider = new Provider(b);
- PointerHolder<QPDFObjectHandle::StreamDataProvider> p = provider;
+ // Create a provider. The provider stays in scope.
+ PointerHolder<QPDFObjectHandle::StreamDataProvider> p1;
+ {
+ // Local scope
+ Pl_Buffer pl("buffer");
+ pl.write(QUtil::unsigned_char_pointer("new data for stream\n"),
+ 20); // no null!
+ pl.finish();
+ PointerHolder<Buffer> b = pl.getBuffer();
+ Provider* provider = new Provider(b);
+ p1 = provider;
+ }
+ // Create a stream that uses a provider in empty1 and copy it
+ // to empty2. It is copied from empty2 to the final pdf.
QPDF empty1;
empty1.emptyPDF();
QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1);
s1.replaceStreamData(
- p, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
+ p1, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
QPDF empty2;
empty2.emptyPDF();
s1 = empty2.copyForeignObject(s1);
{
- // Make sure original PDF is out of scope when we write.
+ // Make sure some source PDFs are out of scope when we
+ // write.
+
+ PointerHolder<QPDFObjectHandle::StreamDataProvider> p2;
+ // Create another provider. This one will go out of scope
+ // along with its containing qpdf, which has
+ // setImmediateCopyFrom(true).
+ {
+ // Local scope
+ Pl_Buffer pl("buffer");
+ pl.write(QUtil::unsigned_char_pointer(
+ "more data for stream\n"),
+ 21); // no null!
+ pl.finish();
+ PointerHolder<Buffer> b = pl.getBuffer();
+ Provider* provider = new Provider(b);
+ p2 = provider;
+ }
+ QPDF empty3;
+ empty3.emptyPDF();
+ empty3.setImmediateCopyFrom(true);
+ QPDFObjectHandle s3 = QPDFObjectHandle::newStream(&empty3);
+ s3.replaceStreamData(
+ p2, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
assert(arg2 != 0);
QPDF oldpdf;
oldpdf.processFile(arg2);
@@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.copyForeignObject(s1));
pdf.getTrailer().getKey("/QTest2").appendItem(
pdf.copyForeignObject(s2));
+ pdf.getTrailer().getKey("/QTest2").appendItem(
+ pdf.copyForeignObject(s3));
}
QPDFWriter w(pdf, "a.pdf");