aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2020-04-05 05:35:35 +0200
committerJay Berkenbilt <ejb@ql.org>2020-04-06 02:07:13 +0200
commit893d38b87e4ad6c6c55f49464f6b721c516ec878 (patch)
tree200289d57f269a394bc2a93f5978322d037f8628
parenta5367003c3eaf7f21d369c1c6d11338564cf04f2 (diff)
downloadqpdf-893d38b87e4ad6c6c55f49464f6b721c516ec878.tar.zst
Allow propagation of errors and retry through StreamDataProvider
StreamDataProvider::provideStreamData now has a rich enough API for it to effectively proxy to pipeStreamData.
-rw-r--r--ChangeLog15
-rw-r--r--TODO12
-rw-r--r--include/qpdf/QPDF.hh9
-rw-r--r--include/qpdf/QPDFObjectHandle.hh73
-rw-r--r--libqpdf/QPDF.cc25
-rw-r--r--libqpdf/QPDFObjectHandle.cc49
-rw-r--r--libqpdf/QPDF_Stream.cc41
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh2
-rw-r--r--qpdf/qpdf.testcov2
-rw-r--r--qpdf/qtest/qpdf.test18
-rw-r--r--qpdf/qtest/qpdf/broken-lzw.out3
-rw-r--r--qpdf/qtest/qpdf/broken-lzw.pdf108
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-25.out1
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-26.out1
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-27.out3
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-in.pdf87
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf51
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf63
-rw-r--r--qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf77
-rw-r--r--qpdf/qtest/qpdf/split-exp-1.pdfbin0 -> 899 bytes
-rw-r--r--qpdf/test_driver.cc3
21 files changed, 469 insertions, 174 deletions
diff --git a/ChangeLog b/ChangeLog
index 8501aaf2..62177431 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,20 @@
2020-04-04 Jay Berkenbilt <ejb@ql.org>
+ * Add a new provideStreamData method for StreamDataProvider that
+ allows a success code to be returned and that accepts the
+ suppress_warnings and will_retry methods. This makes it possible
+ to have a StreamDataProvider call pipeStreamData and propagate its
+ results back. This change allows better error handling and
+ recovery when objects are copied from other files and when
+ "immediate copy from" is enabled.
+
+ * Add a new version of QPDFObjectHandle::pipeStreamData whose
+ return value indicates overall success or failure rather than
+ whether nor not filtering was attempted. It should have always
+ been this way. This change was done in a backward-compatible
+ fashion. Previously existing pipeStreamData methods' return values
+ mean the same as always.
+
* Add "objectinfo" section to json output. In this release,
information about whether each object is a stream or not is
provided. There's otherwise no way to tell conclusively from the
diff --git a/TODO b/TODO
index 06df8421..4a0111a2 100644
--- a/TODO
+++ b/TODO
@@ -20,18 +20,6 @@ ABI Changes
This is a list of changes to make next time there is an ABI change.
Comments appear in the code prefixed by "ABI"
-* (Source compatibility) As somewhat discussed in issue 219, the
- original pipeStreamData in QPDF_Stream has various logic for
- reporting warnings and letting the caller retry. This logic is not
- implemented for stream data providers. When copying foreign streams,
- qpdf uses a stream data provider (QPDF::CopiedStreamDataProvider) to
- read the stream data from the original file. While a warning is
- issued for that case, there is no way to actually propagate failure
- information back through because
- StreamDataProvider::provideStreamData doesn't take the
- suppress_warnings or will_retry options, and adding them would break
- source compatibility.
-
C++-11
======
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 1455736c..720c1bc8 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -813,8 +813,9 @@ class QPDF
virtual ~CopiedStreamDataProvider()
{
}
- virtual void provideStreamData(int objid, int generation,
- Pipeline* pipeline);
+ virtual bool provideStreamData(
+ int objid, int generation, Pipeline* pipeline,
+ bool suppress_warnings, bool will_retry) override;
void registerForeignStream(QPDFObjGen const& local_og,
QPDFObjectHandle foreign_stream);
void registerForeignStream(QPDFObjGen const& local_og,
@@ -909,9 +910,7 @@ class QPDF
bool will_retry);
bool pipeForeignStreamData(
PointerHolder<ForeignStreamData>,
- Pipeline*,
- int encode_flags,
- qpdf_stream_decode_level_e decode_level);
+ Pipeline*, bool suppress_warnings, bool will_retry);
static bool pipeStreamData(PointerHolder<QPDF::EncryptionParameters> encp,
PointerHolder<InputSource> file,
QPDF& qpdf_for_warning,
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index dcb7de35..ef6ff42e 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -57,6 +57,9 @@ class QPDFObjectHandle
class QPDF_DLL_CLASS StreamDataProvider
{
public:
+ QPDF_DLL
+ StreamDataProvider(bool supports_retry = false);
+
QPDF_DLL
virtual ~StreamDataProvider()
{
@@ -74,8 +77,30 @@ class QPDFObjectHandle
// information is made available just to make it more
// convenient to use a single StreamDataProvider object to
// provide data for multiple streams.
+
+ // Prior to qpdf 10.0.0, it was not possible to handle errors
+ // the way pipeStreamData does or to pass back success.
+ // Starting in qpdf 10.0.0, those capabilities have been added
+ // by allowing an alternative provideStreamData to be
+ // implemented. You must implement at least one of the
+ // versions of provideStreamData below. If you implement the
+ // version that supports retry and returns a value, you should
+ // pass true as the value of supports_retry in the base class
+ // constructor. This will cause the library to call that
+ // version of the method, which should also return a boolean
+ // indicating whether it ran without errors.
+ QPDF_DLL
virtual void provideStreamData(int objid, int generation,
- Pipeline* pipeline) = 0;
+ Pipeline* pipeline);
+ QPDF_DLL
+ virtual bool provideStreamData(
+ int objid, int generation, Pipeline* pipeline,
+ bool suppress_warnings, bool will_retry);
+ QPDF_DLL
+ bool supportsRetry();
+
+ private:
+ bool supports_retry;
};
// The TokenFilter class provides a way to filter content streams
@@ -779,18 +804,39 @@ class QPDFObjectHandle
// we determine that we know how to apply all requested filters,
// do so and return true if we are successful.
//
- // In all cases, a return value of true means that filtered data
- // has been written successfully. If filtering is requested but
- // this method returns false, it means there was some error in the
- // filtering, in which case the resulting data is likely partially
- // filtered and/or incomplete and may not be consistent with the
- // configured filters. QPDFWriter handles this by attempting to
- // get the stream data without filtering, but callers should
- // consider a false return value when decode_level is not
- // qpdf_dl_none to be a potential loss of data. If you intend to
- // retry in that case, pass true as the value of will_retry. This
- // changes the warning issued by the library to indicate that the
- // operation will be retried without filtering to avoid data loss.
+ // The exact meaning of the return value differs the different
+ // versions of this function, but for any version, the meaning has
+ // been the same. For the main version, added in qpdf 10, the
+ // return value indicates whether the overall operation succeeded.
+ // The filter parameter, if specified, will be set to whether or
+ // not filtering was attempted. If filtering was not requested,
+ // this value will be false even if the overall operation
+ // succeeded.
+ //
+ // If filtering is requested but this method returns false, it
+ // means there was some error in the filtering, in which case the
+ // resulting data is likely partially filtered and/or incomplete
+ // and may not be consistent with the configured filters.
+ // QPDFWriter handles this by attempting to get the stream data
+ // without filtering, but callers should consider a false return
+ // value when decode_level is not qpdf_dl_none to be a potential
+ // loss of data. If you intend to retry in that case, pass true as
+ // the value of will_retry. This changes the warning issued by the
+ // library to indicate that the operation will be retried without
+ // filtering to avoid data loss.
+
+ // Return value is overall success, even if filtering is not
+ // requested.
+ QPDF_DLL
+ bool pipeStreamData(Pipeline*, bool* filtering_attempted,
+ int encode_flags,
+ qpdf_stream_decode_level_e decode_level,
+ bool suppress_warnings = false,
+ bool will_retry = false);
+
+ // Legacy version. Return value is whether filtering was
+ // attempted. There is no way to determine success if filtering
+ // was not attempted.
QPDF_DLL
bool pipeStreamData(Pipeline*,
int encode_flags,
@@ -804,6 +850,7 @@ class QPDFObjectHandle
// filter = true -> decode_level = qpdf_dl_generalized
// normalize = true -> encode_flags |= qpdf_sf_normalize
// compress = true -> encode_flags |= qpdf_sf_compress
+ // Return value is whether filtering was attempted.
QPDF_DLL
bool pipeStreamData(Pipeline*, bool filter,
bool normalize, bool compress);
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 3177d38b..6219509e 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -68,27 +68,37 @@ QPDF::ForeignStreamData::ForeignStreamData(
QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(
QPDF& destination_qpdf) :
+ QPDFObjectHandle::StreamDataProvider(true),
destination_qpdf(destination_qpdf)
{
}
-void
+bool
QPDF::CopiedStreamDataProvider::provideStreamData(
- int objid, int generation, Pipeline* pipeline)
+ int objid, int generation, Pipeline* pipeline,
+ bool suppress_warnings, bool will_retry)
{
PointerHolder<ForeignStreamData> foreign_data =
this->foreign_stream_data[QPDFObjGen(objid, generation)];
+ bool result = false;
if (foreign_data.getPointer())
{
- destination_qpdf.pipeForeignStreamData(
- foreign_data, pipeline, 0, qpdf_dl_none);
+ result = destination_qpdf.pipeForeignStreamData(
+ foreign_data, pipeline, suppress_warnings, will_retry);
+ QTC::TC("qpdf", "QPDF copy foreign with data",
+ result ? 0 : 1);
}
else
{
QPDFObjectHandle foreign_stream =
this->foreign_streams[QPDFObjGen(objid, generation)];
- foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none);
+ result = foreign_stream.pipeStreamData(
+ pipeline, nullptr, 0, qpdf_dl_none,
+ suppress_warnings, will_retry);
+ QTC::TC("qpdf", "QPDF copy foreign with foreign_stream",
+ result ? 0 : 1);
}
+ return result;
}
void
@@ -2851,8 +2861,7 @@ bool
QPDF::pipeForeignStreamData(
PointerHolder<ForeignStreamData> foreign,
Pipeline* pipeline,
- int encode_flags,
- qpdf_stream_decode_level_e decode_level)
+ bool suppress_warnings, bool will_retry)
{
if (foreign->encp->encrypted)
{
@@ -2863,7 +2872,7 @@ QPDF::pipeForeignStreamData(
foreign->foreign_objid, foreign->foreign_generation,
foreign->offset, foreign->length,
foreign->local_dict, foreign->is_attachment_stream,
- pipeline, false, false);
+ pipeline, suppress_warnings, will_retry);
}
void
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index dca59216..ab77fb73 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -36,6 +36,36 @@ class TerminateParsing
{
};
+QPDFObjectHandle::StreamDataProvider::StreamDataProvider(
+ bool supports_retry) :
+ supports_retry(supports_retry)
+{
+}
+
+void
+QPDFObjectHandle::StreamDataProvider::provideStreamData(
+ int objid, int generation, Pipeline* pipeline)
+{
+ throw std::logic_error(
+ "you must override provideStreamData -- see QPDFObjectHandle.hh");
+}
+
+bool
+QPDFObjectHandle::StreamDataProvider::provideStreamData(
+ int objid, int generation, Pipeline* pipeline,
+ bool suppress_warnings, bool will_retry)
+{
+ throw std::logic_error(
+ "you must override provideStreamData -- see QPDFObjectHandle.hh");
+ return false;
+}
+
+bool
+QPDFObjectHandle::StreamDataProvider::supportsRetry()
+{
+ return this->supports_retry;
+}
+
class CoalesceProvider: public QPDFObjectHandle::StreamDataProvider
{
public:
@@ -1135,14 +1165,29 @@ QPDFObjectHandle::getRawStreamData()
}
bool
-QPDFObjectHandle::pipeStreamData(Pipeline* p,
+QPDFObjectHandle::pipeStreamData(Pipeline* p, bool* filtering_attempted,
int encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings, bool will_retry)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
- p, encode_flags, decode_level, suppress_warnings, will_retry);
+ p, filtering_attempted, encode_flags, decode_level,
+ suppress_warnings, will_retry);
+}
+
+bool
+QPDFObjectHandle::pipeStreamData(Pipeline* p,
+ int encode_flags,
+ qpdf_stream_decode_level_e decode_level,
+ bool suppress_warnings, bool will_retry)
+{
+ assertStream();
+ bool filtering_attempted;
+ dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
+ p, &filtering_attempted, encode_flags, decode_level,
+ suppress_warnings, will_retry);
+ return filtering_attempted;
}
bool
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index dd2796e8..48c1ccf9 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -163,7 +163,7 @@ PointerHolder<Buffer>
QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
{
Pl_Buffer buf("stream data buffer");
- if (! pipeStreamData(&buf, 0, decode_level, false, false))
+ if (! pipeStreamData(&buf, nullptr, 0, decode_level, false, false))
{
throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
"", this->offset,
@@ -177,7 +177,12 @@ PointerHolder<Buffer>
QPDF_Stream::getRawStreamData()
{
Pl_Buffer buf("stream data buffer");
- pipeStreamData(&buf, 0, qpdf_dl_none, false, false);
+ if (! pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false))
+ {
+ throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
+ "", this->offset,
+ "error getting raw stream data");
+ }
QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
return buf.getBuffer();
}
@@ -467,7 +472,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
}
bool
-QPDF_Stream::pipeStreamData(Pipeline* pipeline,
+QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
int encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings, bool will_retry)
@@ -480,7 +485,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
bool early_code_change = true;
bool specialized_compression = false;
bool lossy_compression = false;
- bool filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
+ bool ignored;
+ if (filterp == nullptr)
+ {
+ filterp = &ignored;
+ }
+ bool& filter = *filterp;
+ filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
+ bool success = true;
if (filter)
{
filter = filterable(filters, specialized_compression, lossy_compression,
@@ -505,6 +517,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
if (pipeline == 0)
{
QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
+ // Return value is whether we can filter in this case.
return filter;
}
@@ -625,8 +638,21 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
else if (this->stream_provider.getPointer())
{
Pl_Count count("stream provider count", pipeline);
- this->stream_provider->provideStreamData(
- this->objid, this->generation, &count);
+ if (this->stream_provider->supportsRetry())
+ {
+ if (! this->stream_provider->provideStreamData(
+ this->objid, this->generation, &count,
+ suppress_warnings, will_retry))
+ {
+ filter = false;
+ success = false;
+ }
+ }
+ else
+ {
+ this->stream_provider->provideStreamData(
+ this->objid, this->generation, &count);
+ }
qpdf_offset_t actual_length = count.getCount();
qpdf_offset_t desired_length = 0;
if (this->stream_dict.hasKey("/Length"))
@@ -674,6 +700,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
will_retry))
{
filter = false;
+ success = false;
}
}
@@ -704,7 +731,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
" for content normalization in the manual."));
}
- return filter;
+ return success;
}
void
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index b6428cc9..da9f91a0 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -31,7 +31,7 @@ class QPDF_Stream: public QPDFObject
PointerHolder<QPDFObjectHandle::StreamDataProvider> getStreamDataProvider() const;
// See comments in QPDFObjectHandle.hh for these methods.
- bool pipeStreamData(Pipeline*,
+ bool pipeStreamData(Pipeline*, bool* tried_filtering,
int encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings, bool will_retry);
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 6834c7ad..621ec53a 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -453,3 +453,5 @@ QPDFPageObjectHelper filter form xobject 0
qpdf found resources in non-leaf 0
qpdf found shared resources in leaf 0
qpdf found shared xobject in leaf 0
+QPDF copy foreign with data 1
+QPDF copy foreign with foreign_stream 1
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 560bee07..e012b202 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -1698,6 +1698,8 @@ my @sp_cases = (
[11, '%d in middle', '--encrypt u o 128 --', 'a-%d-split-out.zdf'],
[11, 'pdf extension', '', 'split-out.Pdf'],
[4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'],
+ [1, 'broken data', '--pages broken-lzw.pdf --', 'split-out.pdf',
+ {$td->FILE => "broken-lzw.out", $td->EXIT_STATUS => 3}],
);
$n_tests += 35;
$n_compare_pdfs += 1;
@@ -1713,7 +1715,7 @@ $td->runtest("split page group > 1",
$td->NORMALIZE_NEWLINES);
foreach my $f ('01-05', '06-10', '11-11')
{
- $td->runtest("checkout group $f",
+ $td->runtest("check out group $f",
{$td->FILE => "split-out-group-$f.pdf"},
{$td->FILE => "split-exp-group-$f.pdf"});
}
@@ -1761,12 +1763,17 @@ foreach my $i (qw(01-10 11-20 21-30))
foreach my $d (@sp_cases)
{
- my ($n, $description, $xargs, $out) = @$d;
+ my ($n, $description, $xargs, $out, $exp) = @$d;
+ if (! defined $exp)
+ {
+ $exp = {$td->STRING => "", $td->EXIT_STATUS => 0};
+ }
$td->runtest("split pages " . $description,
{$td->COMMAND =>
"qpdf --static-id --split-pages 11-pages.pdf" .
" $xargs $out"},
- {$td->STRING => "", $td->EXIT_STATUS => 0});
+ $exp,
+ $td->NORMALIZE_NEWLINES);
my $pattern = $out;
my $nlen = length($n);
if ($pattern =~ m/\%d/)
@@ -1786,7 +1793,7 @@ foreach my $d (@sp_cases)
my $actual = sprintf($pattern, $i);
my $expected = $actual;
$expected =~ s/split-out/split-exp/;
- $td->runtest("checkout output page $i",
+ $td->runtest("check output page $i ($description)",
{$td->FILE => $actual},
{$td->FILE => $expected});
}
@@ -2390,7 +2397,8 @@ foreach my $d ([25, 1], [26, 2], [27, 3])
$td->runtest("copy objects $outn",
{$td->COMMAND => "test_driver $testn" .
" minimal.pdf copy-foreign-objects-in.pdf"},
- {$td->STRING => "test $testn done\n", $td->EXIT_STATUS => 0},
+ {$td->FILE => "copy-foreign-objects-$testn.out",
+ $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
diff --git a/qpdf/qtest/qpdf/broken-lzw.out b/qpdf/qtest/qpdf/broken-lzw.out
new file mode 100644
index 00000000..e10b308c
--- /dev/null
+++ b/qpdf/qtest/qpdf/broken-lzw.out
@@ -0,0 +1,3 @@
+WARNING: broken-lzw.pdf (offset 444): error decoding stream data for object 4 0: LZWDecoder: bad code received
+WARNING: broken-lzw.pdf (offset 444): stream will be re-processed without filtering to avoid data loss
+qpdf: operation succeeded with warnings; resulting file may have some problems
diff --git a/qpdf/qtest/qpdf/broken-lzw.pdf b/qpdf/qtest/qpdf/broken-lzw.pdf
new file mode 100644
index 00000000..a5e86538
--- /dev/null
+++ b/qpdf/qtest/qpdf/broken-lzw.pdf
@@ -0,0 +1,108 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+3 0 obj
+<<
+ /Contents [ 4 0 R 6 0 R ]
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 8 0 R
+ >>
+ /ProcSet 9 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+4 0 obj
+<<
+ /Filter /LZWDecode
+ /Length 5 0 R
+>>
+stream
+Not really compressed.
+endstream
+endobj
+
+5 0 obj
+23
+endobj
+
+%% Contents for page 1
+6 0 obj
+<<
+ /Length 7 0 R
+>>
+stream
+Really compressed.
+endstream
+endobj
+
+7 0 obj
+19
+endobj
+
+8 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+9 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 10
+0000000000 65535 f
+0000000025 00000 n
+0000000079 00000 n
+0000000161 00000 n
+0000000386 00000 n
+0000000485 00000 n
+0000000527 00000 n
+0000000601 00000 n
+0000000620 00000 n
+0000000738 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 10
+ /ID [<8b40e25fc6409c63043ed789f3ae4a4b><8b40e25fc6409c63043ed789f3ae4a4b>]
+>>
+startxref
+773
+%%EOF
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-25.out b/qpdf/qtest/qpdf/copy-foreign-objects-25.out
new file mode 100644
index 00000000..da8906a7
--- /dev/null
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-25.out
@@ -0,0 +1 @@
+test 25 done
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-26.out b/qpdf/qtest/qpdf/copy-foreign-objects-26.out
new file mode 100644
index 00000000..a6b86cc8
--- /dev/null
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-26.out
@@ -0,0 +1 @@
+test 26 done
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-27.out b/qpdf/qtest/qpdf/copy-foreign-objects-27.out
new file mode 100644
index 00000000..c95ef9d9
--- /dev/null
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-27.out
@@ -0,0 +1,3 @@
+WARNING: copy-foreign-objects-in.pdf (offset 3627): error decoding stream data for object 26 0: LZWDecoder: bad code received
+WARNING: copy-foreign-objects-in.pdf (offset 3627): stream will be re-processed without filtering to avoid data loss
+test 27 done
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf
index caa4d8f2..727f4247 100644
--- a/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf
@@ -7,12 +7,13 @@
% file.
% The /QTest key in trailer has pointers to several indirect objects:
-% O1, O2, O3 where O1 is an array that contains a dictionary that has
-% a key that points to O2, O2 is a dictionary that contains an array
-% that points to O1, and O3 is a page object that inherits some
-% resource from its parent /Pages and also points to some other page.
-% O1 also points to a stream whose dictionary has a key that points to
-% another stream whose dictionary points back to the first stream.
+% O1, O2, O3, O4 where O1 is an array that contains a dictionary that
+% has a key that points to O2, O2 is a dictionary that contains an
+% array that points to O1, O3 is a page object that inherits some
+% resource from its parent /Pages and also points to some other page,
+% and O4 is a stream with invalid compressed data. O1 also points to a
+% stream whose dictionary has a key that points to another stream
+% whose dictionary points back to the first stream.
1 0 obj
<<
@@ -293,43 +294,59 @@ endobj
% QTest
25 0 obj
-<< /This-is-QTest true /O1 19 0 R /O2 20 0 R /O3 5 0 R >>
+<< /This-is-QTest true /O1 19 0 R /O2 20 0 R /O3 5 0 R /O4 26 0 R >>
+endobj
+
+26 0 obj
+<<
+ /Length 27 0 R
+ /Filter /LZWDecode
+>>
+stream
+Not really compresed.
+endstream
+endobj
+
+27 0 obj
+22
endobj
xref
-0 26
+0 28
0000000000 65535 f
-0000000655 00000 n
-0000000709 00000 n
-0000000845 00000 n
-0000001073 00000 n
-0000001313 00000 n
-0000001580 00000 n
-0000001839 00000 n
-0000002081 00000 n
-0000002183 00000 n
-0000002202 00000 n
-0000002334 00000 n
-0000002438 00000 n
-0000002481 00000 n
-0000002585 00000 n
-0000002628 00000 n
-0000002732 00000 n
-0000002775 00000 n
-0000002879 00000 n
-0000002904 00000 n
-0000003042 00000 n
-0000003138 00000 n
-0000003255 00000 n
-0000003285 00000 n
-0000003402 00000 n
-0000003430 00000 n
+0000000706 00000 n
+0000000760 00000 n
+0000000896 00000 n
+0000001124 00000 n
+0000001364 00000 n
+0000001631 00000 n
+0000001890 00000 n
+0000002132 00000 n
+0000002234 00000 n
+0000002253 00000 n
+0000002385 00000 n
+0000002489 00000 n
+0000002532 00000 n
+0000002636 00000 n
+0000002679 00000 n
+0000002783 00000 n
+0000002826 00000 n
+0000002930 00000 n
+0000002955 00000 n
+0000003093 00000 n
+0000003189 00000 n
+0000003306 00000 n
+0000003336 00000 n
+0000003453 00000 n
+0000003481 00000 n
+0000003567 00000 n
+0000003667 00000 n
trailer <<
/Root 1 0 R
- /Size 26
+ /Size 28
/QTest 25 0 R
/ID [<d15f7aca3be584a96c1c94adb0931e71><9adb6b2fdb22e857340f7103917b16e4>]
>>
startxref
-3505
+3687
%%EOF
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf
index 49de3cd3..82ce8cb6 100644
--- a/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf
@@ -4,27 +4,33 @@
<< /Pages 3 0 R /Type /Catalog >>
endobj
2 0 obj
-<< /O1 4 0 R /O2 5 0 R /This-is-QTest true >>
+<< /O1 4 0 R /O2 5 0 R /O4 6 0 R /This-is-QTest true >>
endobj
3 0 obj
-<< /Count 1 /Kids [ 6 0 R ] /Type /Pages >>
+<< /Count 1 /Kids [ 7 0 R ] /Type /Pages >>
endobj
4 0 obj
-[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 7 0 R ]
+[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 8 0 R ]
endobj
5 0 obj
<< /K1 [ 2.236 /O1 4 0 R 1.732 ] /O1 4 0 R /This-is-O2 true >>
endobj
6 0 obj
-<< /Contents 8 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 10 0 R >> /Type /Page >>
+<< /Filter /LZWDecode /Length 22 >>
+stream
+Not really compresed.
+endstream
endobj
7 0 obj
-<< /Stream2 11 0 R /This-is-Stream1 true /Length 18 >>
+<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
+endobj
+8 0 obj
+<< /Stream2 12 0 R /This-is-Stream1 true /Length 18 >>
stream
This is stream 1.
endstream
endobj
-8 0 obj
+9 0 obj
<< /Length 44 >>
stream
BT
@@ -34,33 +40,34 @@ BT
ET
endstream
endobj
-9 0 obj
+10 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
-10 0 obj
+11 0 obj
[ /PDF /Text ]
endobj
-11 0 obj
-<< /Stream1 7 0 R /This-is-Stream2 true /Length 18 >>
+12 0 obj
+<< /Stream1 8 0 R /This-is-Stream2 true /Length 18 >>
stream
This is stream 2.
endstream
endobj
xref
-0 12
+0 13
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
-0000000125 00000 n
-0000000184 00000 n
-0000000301 00000 n
-0000000379 00000 n
-0000000523 00000 n
-0000000628 00000 n
-0000000721 00000 n
-0000000828 00000 n
-0000000859 00000 n
-trailer << /QTest 2 0 R /Root 1 0 R /Size 12 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
+0000000135 00000 n
+0000000194 00000 n
+0000000311 00000 n
+0000000389 00000 n
+0000000479 00000 n
+0000000624 00000 n
+0000000729 00000 n
+0000000822 00000 n
+0000000930 00000 n
+0000000961 00000 n
+trailer << /QTest 2 0 R /Root 1 0 R /Size 13 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
-964
+1066
%%EOF
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf
index 76529aae..ec4a0894 100644
--- a/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf
@@ -4,39 +4,45 @@
<< /Pages 3 0 R /Type /Catalog >>
endobj
2 0 obj
-<< /O1 4 0 R /O2 5 0 R /O3 6 0 R /This-is-QTest true >>
+<< /O1 4 0 R /O2 5 0 R /O3 6 0 R /O4 7 0 R /This-is-QTest true >>
endobj
3 0 obj
-<< /Count 2 /Kids [ 7 0 R 6 0 R ] /Type /Pages >>
+<< /Count 2 /Kids [ 8 0 R 6 0 R ] /Type /Pages >>
endobj
4 0 obj
-[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 8 0 R ]
+[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 9 0 R ]
endobj
5 0 obj
<< /K1 [ 2.236 /O1 4 0 R 1.732 ] /O1 4 0 R /This-is-O2 true >>
endobj
6 0 obj
-<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
+<< /Contents 10 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 11 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
endobj
7 0 obj
-<< /Contents 11 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 12 0 R >> /ProcSet 13 0 R >> /Type /Page >>
+<< /Filter /LZWDecode /Length 22 >>
+stream
+Not really compresed.
+endstream
endobj
8 0 obj
-<< /Stream2 14 0 R /This-is-Stream1 true /Length 18 >>
+<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >>
+endobj
+9 0 obj
+<< /Stream2 15 0 R /This-is-Stream1 true /Length 18 >>
stream
This is stream 1.
endstream
endobj
-9 0 obj
+10 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
endstream
endobj
-10 0 obj
+11 0 obj
<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
-11 0 obj
+12 0 obj
<< /Length 44 >>
stream
BT
@@ -46,36 +52,37 @@ BT
ET
endstream
endobj
-12 0 obj
+13 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
-13 0 obj
+14 0 obj
[ /PDF /Text ]
endobj
-14 0 obj
-<< /Stream1 8 0 R /This-is-Stream2 true /Length 18 >>
+15 0 obj
+<< /Stream1 9 0 R /This-is-Stream2 true /Length 18 >>
stream
This is stream 2.
endstream
endobj
xref
-0 15
+0 16
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
-0000000135 00000 n
-0000000200 00000 n
-0000000317 00000 n
-0000000395 00000 n
-0000000577 00000 n
-0000000723 00000 n
-0000000828 00000 n
-0000000924 00000 n
-0000001024 00000 n
-0000001118 00000 n
-0000001226 00000 n
-0000001257 00000 n
-trailer << /QTest 2 0 R /Root 1 0 R /Size 15 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
+0000000145 00000 n
+0000000210 00000 n
+0000000327 00000 n
+0000000405 00000 n
+0000000588 00000 n
+0000000678 00000 n
+0000000824 00000 n
+0000000929 00000 n
+0000001026 00000 n
+0000001126 00000 n
+0000001220 00000 n
+0000001328 00000 n
+0000001359 00000 n
+trailer << /QTest 2 0 R /Root 1 0 R /Size 16 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
-1362
+1464
%%EOF
diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
index 489aef8e..21547896 100644
--- a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
+++ b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
@@ -4,7 +4,7 @@
<< /Pages 6 0 R /Type /Catalog >>
endobj
2 0 obj
-<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >>
+<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /O4 10 0 R /This-is-QTest true >>
endobj
3 0 obj
<< /Length 20 >>
@@ -25,39 +25,45 @@ more data for stream
endstream
endobj
6 0 obj
-<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >>
+<< /Count 3 /Kids [ 11 0 R 12 0 R 9 0 R ] /Type /Pages >>
endobj
7 0 obj
-[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ]
+[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 13 0 R ]
endobj
8 0 obj
<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >>
endobj
9 0 obj
-<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
+<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 12 0 R /Parent 6 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
endobj
10 0 obj
-<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >>
+<< /Filter /LZWDecode /Length 22 >>
+stream
+Not really compresed.
+endstream
endobj
11 0 obj
-<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
+<< /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 17 0 R >> /ProcSet 18 0 R >> /Type /Page >>
endobj
12 0 obj
-<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >>
+<< /Contents 19 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
+endobj
+13 0 obj
+<< /Stream2 20 0 R /This-is-Stream1 true /Length 18 >>
stream
This is stream 1.
endstream
endobj
-13 0 obj
+14 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
endstream
endobj
-14 0 obj
+15 0 obj
<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
-15 0 obj
+16 0 obj
<< /Length 44 >>
stream
BT
@@ -67,47 +73,48 @@ BT
ET
endstream
endobj
-16 0 obj
+17 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
-17 0 obj
+18 0 obj
[ /PDF /Text ]
endobj
-18 0 obj
+19 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET
endstream
endobj
-19 0 obj
-<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >>
+20 0 obj
+<< /Stream1 13 0 R /This-is-Stream2 true /Length 18 >>
stream
This is stream 2.
endstream
endobj
xref
-0 20
+0 21
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
-0000000135 00000 n
-0000000204 00000 n
-0000000259 00000 n
-0000000329 00000 n
-0000000402 00000 n
-0000000520 00000 n
-0000000598 00000 n
-0000000799 00000 n
-0000000946 00000 n
-0000001141 00000 n
-0000001247 00000 n
-0000001344 00000 n
-0000001444 00000 n
-0000001538 00000 n
-0000001646 00000 n
-0000001677 00000 n
-0000001774 00000 n
-trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
+0000000146 00000 n
+0000000215 00000 n
+0000000270 00000 n
+0000000340 00000 n
+0000000413 00000 n
+0000000531 00000 n
+0000000609 00000 n
+0000000810 00000 n
+0000000901 00000 n
+0000001048 00000 n
+0000001243 00000 n
+0000001349 00000 n
+0000001446 00000 n
+0000001546 00000 n
+0000001640 00000 n
+0000001748 00000 n
+0000001779 00000 n
+0000001876 00000 n
+trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 21 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
-1880
+1982
%%EOF
diff --git a/qpdf/qtest/qpdf/split-exp-1.pdf b/qpdf/qtest/qpdf/split-exp-1.pdf
new file mode 100644
index 00000000..77a38bc6
--- /dev/null
+++ b/qpdf/qtest/qpdf/split-exp-1.pdf
Binary files differ
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index d3305377..eba0903b 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -1210,7 +1210,8 @@ void runtest(int n, char const* filename1, char const* arg2)
QPDFWriter w(pdf, "a.pdf");
w.setStaticID(true);
- w.setStreamDataMode(qpdf_s_preserve);
+ w.setCompressStreams(false);
+ w.setDecodeLevel(qpdf_dl_generalized);
w.write();
}
else if (n == 28)