aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--include/qpdf/QPDF.hh14
-rw-r--r--include/qpdf/QPDFObjectHandle.hh3
-rw-r--r--include/qpdf/QPDFWriter.hh12
-rw-r--r--libqpdf/QPDF.cc28
-rw-r--r--libqpdf/QPDFObjectHandle.cc5
-rw-r--r--libqpdf/QPDFWriter.cc22
-rw-r--r--libqpdf/QPDF_Stream.cc17
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh3
-rw-r--r--manual/qpdf-manual.xml17
-rw-r--r--qpdf/qpdf.cc10
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test20
-rw-r--r--qpdf/qtest/qpdf/bad-data-out.pdfbin0 -> 759 bytes
-rw-r--r--qpdf/qtest/qpdf/bad-data-precheck.pdfbin0 -> 797 bytes
-rw-r--r--qpdf/qtest/qpdf/bad-data.out2
-rw-r--r--qpdf/qtest/qpdf/bad-data.pdfbin0 -> 799 bytes
17 files changed, 133 insertions, 25 deletions
diff --git a/ChangeLog b/ChangeLog
index 119a4c6c..026833d4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2017-07-27 Jay Berkenbilt <ejb@ql.org>
+ * Add --precheck-streams command-line option and setStreamPrecheck
+ option to QPDFWriter to tell QPDFWriter to attempt decoding a
+ stream fully before deciding whether to filter it or not.
+
* Recover gracefully from streams that aren't filterable because
the filter parameters are invalid in the stream dictionary or the
dictionary itself is invalid.
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 18a6851f..ef9ce597 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -540,13 +540,14 @@ class QPDF
{
friend class QPDF_Stream;
private:
- static void pipeStreamData(QPDF* qpdf, int objid, int generation,
+ static bool pipeStreamData(QPDF* qpdf, int objid, int generation,
qpdf_offset_t offset, size_t length,
QPDFObjectHandle dict,
- Pipeline* pipeline)
+ Pipeline* pipeline, bool suppress_warnings)
{
- qpdf->pipeStreamData(
- objid, generation, offset, length, dict, pipeline);
+ return qpdf->pipeStreamData(
+ objid, generation, offset, length, dict, pipeline,
+ suppress_warnings);
}
};
friend class Pipe;
@@ -666,10 +667,11 @@ class QPDF
void findAttachmentStreams();
// Calls finish() on the pipeline when done but does not delete it
- void pipeStreamData(int objid, int generation,
+ bool pipeStreamData(int objid, int generation,
qpdf_offset_t offset, size_t length,
QPDFObjectHandle dict,
- Pipeline* pipeline);
+ Pipeline* pipeline,
+ bool suppress_warnings);
// For QPDFWriter:
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 0fc989a5..11a52596 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -394,7 +394,8 @@ class QPDFObjectHandle
// replaced if writing a new stream object.
QPDF_DLL
bool pipeStreamData(Pipeline*, bool filter,
- bool normalize, bool compress);
+ bool normalize, bool compress,
+ bool suppress_warnings = false);
// Replace a stream's dictionary. The new dictionary must be
// consistent with the stream's data. This is most appropriately
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index b2738c1f..2687cce0 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -144,6 +144,17 @@ class QPDFWriter
QPDF_DLL
void setQDFMode(bool);
+ // Enable stream precheck mode. In this mode, all filterable
+ // streams are checked by actually attempting to decode them
+ // before filtering. This may add significant time to the process
+ // of writing the data because all streams from the input must be
+ // read twice, but it enables the raw stream data to be preserved
+ // even in cases where qpdf would run into errors decoding the
+ // stream after it determines that it should be able to do it.
+ // Examples would include compressed data with errors in it.
+ QPDF_DLL
+ void setPrecheckStreams(bool);
+
// Set the minimum PDF version. If the PDF version of the input
// file (or previously set minimum version) is less than the
// version passed to this method, the PDF version of the output
@@ -415,6 +426,7 @@ class QPDFWriter
bool stream_data_mode_set;
qpdf_stream_data_e stream_data_mode;
bool qdf_mode;
+ bool precheck_streams;
bool static_id;
bool suppress_original_object_ids;
bool direct_stream_lengths;
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 32c8cdf9..b5c1212c 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -2134,12 +2134,14 @@ QPDF::getCompressibleObjGens()
return result;
}
-void
+bool
QPDF::pipeStreamData(int objid, int generation,
qpdf_offset_t offset, size_t length,
QPDFObjectHandle stream_dict,
- Pipeline* pipeline)
+ Pipeline* pipeline,
+ bool suppress_warnings)
{
+ bool success = false;
std::vector<PointerHolder<Pipeline> > to_delete;
if (this->encrypted)
{
@@ -2165,21 +2167,29 @@ QPDF::pipeStreamData(int objid, int generation,
length -= len;
pipeline->write(QUtil::unsigned_char_pointer(buf), len);
}
+ success = true;
}
catch (QPDFExc& e)
{
- warn(e);
+ if (! suppress_warnings)
+ {
+ warn(e);
+ }
}
catch (std::runtime_error& e)
{
- QTC::TC("qpdf", "QPDF decoding error warning");
- warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
- "", this->file->getLastOffset(),
- "error decoding stream data for object " +
- QUtil::int_to_string(objid) + " " +
- QUtil::int_to_string(generation) + ": " + e.what()));
+ if (! suppress_warnings)
+ {
+ QTC::TC("qpdf", "QPDF decoding error warning");
+ warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
+ "", this->file->getLastOffset(),
+ "error decoding stream data for object " +
+ QUtil::int_to_string(objid) + " " +
+ QUtil::int_to_string(generation) + ": " + e.what()));
+ }
}
pipeline->finish();
+ return success;
}
void
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 7618cdf3..bac233df 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -496,11 +496,12 @@ QPDFObjectHandle::getRawStreamData()
bool
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
- bool normalize, bool compress)
+ bool normalize, bool compress,
+ bool suppress_warnings)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
- p, filter, normalize, compress);
+ p, filter, normalize, compress, suppress_warnings);
}
void
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 01748fc7..59e306fc 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -57,6 +57,7 @@ QPDFWriter::init()
stream_data_mode_set = false;
stream_data_mode = qpdf_s_compress;
qdf_mode = false;
+ precheck_streams = false;
static_id = false;
suppress_original_object_ids = false;
direct_stream_lengths = true;
@@ -177,6 +178,12 @@ QPDFWriter::setQDFMode(bool val)
}
void
+QPDFWriter::setPrecheckStreams(bool val)
+{
+ this->precheck_streams = val;
+}
+
+void
QPDFWriter::setMinimumPDFVersion(std::string const& version)
{
setMinimumPDFVersion(version, 0);
@@ -1522,6 +1529,21 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
flags |= f_stream;
+ if (filter && this->precheck_streams)
+ {
+ try
+ {
+ QTC::TC("qpdf", "QPDFWriter precheck stream");
+ Pl_Discard discard;
+ filter = object.pipeStreamData(
+ &discard, true, false, false, true);
+ }
+ catch (std::exception)
+ {
+ filter = false;
+ }
+ }
+
pushPipeline(new Pl_Buffer("stream data"));
activatePipelineStack();
bool filtered =
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index b4d14441..31d583b8 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -85,7 +85,7 @@ PointerHolder<Buffer>
QPDF_Stream::getStreamData()
{
Pl_Buffer buf("stream data buffer");
- if (! pipeStreamData(&buf, true, false, false))
+ if (! pipeStreamData(&buf, true, false, false, false))
{
throw std::logic_error("getStreamData called on unfilterable stream");
}
@@ -97,7 +97,7 @@ PointerHolder<Buffer>
QPDF_Stream::getRawStreamData()
{
Pl_Buffer buf("stream data buffer");
- pipeStreamData(&buf, false, false, false);
+ pipeStreamData(&buf, false, false, false, false);
QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
return buf.getBuffer();
}
@@ -351,7 +351,8 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
bool
QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
- bool normalize, bool compress)
+ bool normalize, bool compress,
+ bool suppress_warnings)
{
std::vector<std::string> filters;
int predictor = 1;
@@ -487,9 +488,13 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
else
{
QTC::TC("qpdf", "QPDF_Stream pipe original stream data");
- QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
- this->offset, this->length,
- this->stream_dict, pipeline);
+ if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
+ this->offset, this->length,
+ this->stream_dict, pipeline,
+ suppress_warnings))
+ {
+ filter = false;
+ }
}
return filter;
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index fa405d70..d053fd0f 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -23,7 +23,8 @@ class QPDF_Stream: public QPDFObject
// See comments in QPDFObjectHandle.hh for these methods.
bool pipeStreamData(Pipeline*, bool filter,
- bool normalize, bool compress);
+ bool normalize, bool compress,
+ bool suppress_warnings);
PointerHolder<Buffer> getStreamData();
PointerHolder<Buffer> getRawStreamData();
void replaceStreamData(PointerHolder<Buffer> data,
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index a4c34e90..cd35718d 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -822,6 +822,23 @@ outfile.pdf</option>
</listitem>
</varlistentry>
<varlistentry>
+ <term><option>--precheck-streams</option></term>
+ <listitem>
+ <para>
+ Tells qpdf to precheck each stream for the ability to decode
+ it. Ordinarily qpdf tries to decode streams that it thinks it
+ can decode based on the filters, and if there ends up being an
+ error when actually trying to do the decode, the stream data
+ is truncated. This flag causes qpdf to actually read the
+ stream fully before deciding whether to filter the stream.
+ This option will slow qpdf down since it will have to read the
+ stream twice, but it allows raw stream data to be preserved in
+ cases where the decoding of the stream would fail for some
+ reason. This may be useful in working with some damaged files.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>--qdf</option></term>
<listitem>
<para>
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index c52e1125..99cfd3a1 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -202,6 +202,7 @@ familiar with the PDF file format or who are PDF developers.\n\
--suppress-recovery prevents qpdf from attempting to recover damaged files\n\
--object-streams=mode controls handing of object streams\n\
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
+--precheck-streams precheck ability to decode streams\n\
--qdf turns on \"QDF mode\" (below)\n\
--min-version=version sets the minimum PDF version of the output file\n\
--force-version=version forces this to be the PDF version of the output file\n\
@@ -1028,6 +1029,7 @@ int main(int argc, char* argv[])
qpdf_object_stream_e object_stream_mode = qpdf_o_preserve;
bool ignore_xref_streams = false;
bool qdf_mode = false;
+ bool precheck_streams = false;
std::string min_version;
std::string force_version;
@@ -1213,6 +1215,10 @@ int main(int argc, char* argv[])
{
qdf_mode = true;
}
+ else if (strcmp(arg, "precheck-streams") == 0)
+ {
+ precheck_streams = true;
+ }
else if (strcmp(arg, "min-version") == 0)
{
if (parameter == 0)
@@ -1704,6 +1710,10 @@ int main(int argc, char* argv[])
{
w.setQDFMode(true);
}
+ if (precheck_streams)
+ {
+ w.setPrecheckStreams(true);
+ }
if (normalize_set)
{
w.setContentNormalization(normalize);
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 268ecb16..bf227c7a 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -279,3 +279,4 @@ QPDFObjectHandle treat word as string 0
QPDFObjectHandle found fake 1
QPDFObjectHandle no val for last key 0
QPDF resolve failure to null 0
+QPDFWriter precheck stream 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index b80ab9cb..b61882b9 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -723,6 +723,26 @@ $td->runtest("check output",
{$td->FILE => "from-scratch-0.pdf"});
show_ntests();
# ----------
+$td->notify("--- Precheck streams ---");
+$n_tests += 4;
+
+$td->runtest("bad stream without precheck",
+ {$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
+ {$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "bad-data-out.pdf"});
+$td->runtest("bad stream with precheck",
+ {$td->COMMAND =>
+ "qpdf --static-id --precheck-streams bad-data.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "bad-data-precheck.pdf"});
+show_ntests();
+# ----------
$td->notify("--- Copy Foreign Objects ---");
$n_tests += 7;
diff --git a/qpdf/qtest/qpdf/bad-data-out.pdf b/qpdf/qtest/qpdf/bad-data-out.pdf
new file mode 100644
index 00000000..f4300662
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-data-out.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/bad-data-precheck.pdf b/qpdf/qtest/qpdf/bad-data-precheck.pdf
new file mode 100644
index 00000000..4314025a
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-data-precheck.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/bad-data.out b/qpdf/qtest/qpdf/bad-data.out
new file mode 100644
index 00000000..3ea1d07f
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-data.out
@@ -0,0 +1,2 @@
+WARNING: bad-data.pdf (file position 319): error decoding stream data for object 4 0: LZWDecoder: bad code received
+qpdf: operation succeeded with warnings; resulting file may have some problems
diff --git a/qpdf/qtest/qpdf/bad-data.pdf b/qpdf/qtest/qpdf/bad-data.pdf
new file mode 100644
index 00000000..94ddafd4
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-data.pdf
Binary files differ