From 7f8892525f897b17049f9e59bc4ce8ac28c9e082 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 27 Jul 2017 23:42:27 -0400 Subject: Add precheck streams capability When requested, QPDFWriter will do more aggress prechecking of streams to make sure it can actually succeed in decoding them before attempting to do so. This will allow preservation of raw data even when the raw data is corrupted relative to the specified filters. --- libqpdf/QPDF.cc | 28 +++++++++++++++++++--------- libqpdf/QPDFObjectHandle.cc | 5 +++-- libqpdf/QPDFWriter.cc | 22 ++++++++++++++++++++++ libqpdf/QPDF_Stream.cc | 17 +++++++++++------ libqpdf/qpdf/QPDF_Stream.hh | 3 ++- 5 files changed, 57 insertions(+), 18 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 32c8cdf9..b5c1212c 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2134,12 +2134,14 @@ QPDF::getCompressibleObjGens() return result; } -void +bool QPDF::pipeStreamData(int objid, int generation, qpdf_offset_t offset, size_t length, QPDFObjectHandle stream_dict, - Pipeline* pipeline) + Pipeline* pipeline, + bool suppress_warnings) { + bool success = false; std::vector > to_delete; if (this->encrypted) { @@ -2165,21 +2167,29 @@ QPDF::pipeStreamData(int objid, int generation, length -= len; pipeline->write(QUtil::unsigned_char_pointer(buf), len); } + success = true; } catch (QPDFExc& e) { - warn(e); + if (! suppress_warnings) + { + warn(e); + } } catch (std::runtime_error& e) { - QTC::TC("qpdf", "QPDF decoding error warning"); - warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), - "", this->file->getLastOffset(), - "error decoding stream data for object " + - QUtil::int_to_string(objid) + " " + - QUtil::int_to_string(generation) + ": " + e.what())); + if (! suppress_warnings) + { + QTC::TC("qpdf", "QPDF decoding error warning"); + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), + "", this->file->getLastOffset(), + "error decoding stream data for object " + + QUtil::int_to_string(objid) + " " + + QUtil::int_to_string(generation) + ": " + e.what())); + } } pipeline->finish(); + return success; } void diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 7618cdf3..bac233df 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -496,11 +496,12 @@ QPDFObjectHandle::getRawStreamData() bool QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, - bool normalize, bool compress) + bool normalize, bool compress, + bool suppress_warnings) { assertStream(); return dynamic_cast(obj.getPointer())->pipeStreamData( - p, filter, normalize, compress); + p, filter, normalize, compress, suppress_warnings); } void diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 01748fc7..59e306fc 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -57,6 +57,7 @@ QPDFWriter::init() stream_data_mode_set = false; stream_data_mode = qpdf_s_compress; qdf_mode = false; + precheck_streams = false; static_id = false; suppress_original_object_ids = false; direct_stream_lengths = true; @@ -176,6 +177,12 @@ QPDFWriter::setQDFMode(bool val) this->qdf_mode = val; } +void +QPDFWriter::setPrecheckStreams(bool val) +{ + this->precheck_streams = val; +} + void QPDFWriter::setMinimumPDFVersion(std::string const& version) { @@ -1522,6 +1529,21 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, flags |= f_stream; + if (filter && this->precheck_streams) + { + try + { + QTC::TC("qpdf", "QPDFWriter precheck stream"); + Pl_Discard discard; + filter = object.pipeStreamData( + &discard, true, false, false, true); + } + catch (std::exception) + { + filter = false; + } + } + pushPipeline(new Pl_Buffer("stream data")); activatePipelineStack(); bool filtered = diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index b4d14441..31d583b8 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -85,7 +85,7 @@ PointerHolder QPDF_Stream::getStreamData() { Pl_Buffer buf("stream data buffer"); - if (! pipeStreamData(&buf, true, false, false)) + if (! pipeStreamData(&buf, true, false, false, false)) { throw std::logic_error("getStreamData called on unfilterable stream"); } @@ -97,7 +97,7 @@ PointerHolder QPDF_Stream::getRawStreamData() { Pl_Buffer buf("stream data buffer"); - pipeStreamData(&buf, false, false, false); + pipeStreamData(&buf, false, false, false, false); QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); return buf.getBuffer(); } @@ -351,7 +351,8 @@ QPDF_Stream::filterable(std::vector& filters, bool QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, - bool normalize, bool compress) + bool normalize, bool compress, + bool suppress_warnings) { std::vector filters; int predictor = 1; @@ -487,9 +488,13 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, else { QTC::TC("qpdf", "QPDF_Stream pipe original stream data"); - QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation, - this->offset, this->length, - this->stream_dict, pipeline); + if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation, + this->offset, this->length, + this->stream_dict, pipeline, + suppress_warnings)) + { + filter = false; + } } return filter; diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index fa405d70..d053fd0f 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -23,7 +23,8 @@ class QPDF_Stream: public QPDFObject // See comments in QPDFObjectHandle.hh for these methods. bool pipeStreamData(Pipeline*, bool filter, - bool normalize, bool compress); + bool normalize, bool compress, + bool suppress_warnings); PointerHolder getStreamData(); PointerHolder getRawStreamData(); void replaceStreamData(PointerHolder data, -- cgit v1.2.3-70-g09d2