aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-19 20:40:33 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-21 23:44:22 +0200
commitddc6cf0cf6c11bb50f9f576bf547df3674142c97 (patch)
tree315cb87cce6ab27648dfa8edc5e941f04bf5178b
parent9744414c66e3f85700ebc8b32d90f45ff97221bd (diff)
downloadqpdf-ddc6cf0cf6c11bb50f9f576bf547df3674142c97.tar.zst
Precheck streams by default
There is no need for a --precheck-streams option. We can do the precheck without imposing any penalty, only re-encoding the stream if it fails the first time.
-rw-r--r--include/qpdf/QPDFWriter.hh12
-rw-r--r--libqpdf/QPDFWriter.cc49
-rw-r--r--manual/qpdf-manual.xml17
-rw-r--r--qpdf/qpdf.cc11
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test23
-rw-r--r--qpdf/qtest/qpdf/bad-data-out.pdfbin759 -> 797 bytes
-rw-r--r--qpdf/qtest/qpdf/bad-data-precheck.pdfbin797 -> 0 bytes
8 files changed, 32 insertions, 81 deletions
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index c4bc7846..2f878492 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -200,17 +200,6 @@ class QPDFWriter
QPDF_DLL
void setQDFMode(bool);
- // Enable stream precheck mode. In this mode, all filterable
- // streams are checked by actually attempting to decode them
- // before filtering. This may add significant time to the process
- // of writing the data because all streams from the input must be
- // read twice, but it enables the raw stream data to be preserved
- // even in cases where qpdf would run into errors decoding the
- // stream after it determines that it should be able to do it.
- // Examples would include compressed data with errors in it.
- QPDF_DLL
- void setPrecheckStreams(bool);
-
// Preserve unreferenced objects. The default behavior is to
// discard any object that is not visited during a traversal of
// the object structure from the trailer.
@@ -495,7 +484,6 @@ class QPDFWriter
qpdf_stream_decode_level_e stream_decode_level;
bool stream_decode_level_set;
bool qdf_mode;
- bool precheck_streams;
bool preserve_unreferenced_objects;
bool newline_before_endstream;
bool static_id;
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index fe25853a..06945c49 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -59,7 +59,6 @@ QPDFWriter::init()
stream_decode_level = qpdf_dl_none;
stream_decode_level_set = false;
qdf_mode = false;
- precheck_streams = false;
preserve_unreferenced_objects = false;
newline_before_endstream = false;
static_id = false;
@@ -216,12 +215,6 @@ QPDFWriter::setQDFMode(bool val)
}
void
-QPDFWriter::setPrecheckStreams(bool val)
-{
- this->precheck_streams = val;
-}
-
-void
QPDFWriter::setPreserveUnreferencedObjects(bool val)
{
this->preserve_unreferenced_objects = val;
@@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
flags |= f_stream;
- if (filter && this->precheck_streams)
+ PointerHolder<Buffer> stream_data;
+ bool filtered = false;
+ for (int attempt = 1; attempt <= 2; ++attempt)
{
- try
+ pushPipeline(new Pl_Buffer("stream data"));
+ activatePipelineStack();
+
+ filtered =
+ object.pipeStreamData(
+ this->pipeline,
+ (((filter && normalize) ? qpdf_ef_normalize : 0) |
+ ((filter && compress) ? qpdf_ef_compress : 0)),
+ (filter
+ ? (uncompress ? qpdf_dl_all : this->stream_decode_level)
+ : qpdf_dl_none));
+ popPipelineStack(&stream_data);
+ if (filter && (! filtered))
{
- QTC::TC("qpdf", "QPDFWriter precheck stream");
- Pl_Discard discard;
- filter = object.pipeStreamData(
- &discard, 0, qpdf_dl_all, true);
+ // Try again
+ filter = false;
}
- catch (std::exception&)
+ else
{
- filter = false;
+ break;
}
}
-
- pushPipeline(new Pl_Buffer("stream data"));
- activatePipelineStack();
-
- bool filtered =
- object.pipeStreamData(
- this->pipeline,
- (((filter && normalize) ? qpdf_ef_normalize : 0) |
- ((filter && compress) ? qpdf_ef_compress : 0)),
- (filter
- ? (uncompress ? qpdf_dl_all : this->stream_decode_level)
- : qpdf_dl_none));
- PointerHolder<Buffer> stream_data;
- popPipelineStack(&stream_data);
if (filtered)
{
flags |= f_filtered;
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index 14e8b8bc..0e7d05f7 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -997,23 +997,6 @@ outfile.pdf</option>
</listitem>
</varlistentry>
<varlistentry>
- <term><option>--precheck-streams</option></term>
- <listitem>
- <para>
- Tells qpdf to precheck each stream for the ability to decode
- it. Ordinarily qpdf tries to decode streams that it thinks it
- can decode based on the filters, and if there ends up being an
- error when actually trying to do the decode, the stream data
- is truncated. This flag causes qpdf to actually read the
- stream fully before deciding whether to filter the stream.
- This option will slow qpdf down since it will have to read the
- stream twice, but it allows raw stream data to be preserved in
- cases where the decoding of the stream would fail for some
- reason. This may be useful in working with some damaged files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
<term><option>--preserve-unreferenced</option></term>
<listitem>
<para>
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index df2ba225..ade0d4f2 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -87,7 +87,6 @@ struct Options
object_stream_mode(qpdf_o_preserve),
ignore_xref_streams(false),
qdf_mode(false),
- precheck_streams(false),
preserve_unreferenced_objects(false),
newline_before_endstream(false),
show_npages(false),
@@ -149,7 +148,6 @@ struct Options
qpdf_object_stream_e object_stream_mode;
bool ignore_xref_streams;
bool qdf_mode;
- bool precheck_streams;
bool preserve_unreferenced_objects;
bool newline_before_endstream;
std::string min_version;
@@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\
--suppress-recovery prevents qpdf from attempting to recover damaged files\n\
--object-streams=mode controls handing of object streams\n\
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
---precheck-streams precheck ability to decode streams\n\
--preserve-unreferenced preserve unreferenced objects\n\
--newline-before-endstream always put a newline before endstream\n\
--qdf turns on \"QDF mode\" (below)\n\
@@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options& o)
{
o.qdf_mode = true;
}
- else if (strcmp(arg, "precheck-streams") == 0)
- {
- o.precheck_streams = true;
- }
else if (strcmp(arg, "preserve-unreferenced") == 0)
{
o.preserve_unreferenced_objects = true;
@@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
w.setQDFMode(true);
}
- if (o.precheck_streams)
- {
- w.setPrecheckStreams(true);
- }
if (o.preserve_unreferenced_objects)
{
w.setPreserveUnreferencedObjects(true);
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 2a157c91..c1355d59 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0
QPDFObjectHandle found fake 1
QPDFObjectHandle no val for last key 0
QPDF resolve failure to null 0
-QPDFWriter precheck stream 0
QPDFWriter preserve unreferenced standard 0
QPDFObjectHandle non-stream in parsecontent 0
QPDFObjectHandle errors in parsecontent 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 97d73277..cf8c88ca 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -918,27 +918,20 @@ $td->runtest("check output",
show_ntests();
# ----------
$td->notify("--- Precheck streams ---");
-$n_tests += 4;
+$n_tests += 2;
-$td->runtest("bad stream without precheck",
+$td->runtest("bad stream",
{$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
{$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "bad-data-out.pdf"});
-$td->runtest("bad stream with precheck",
- {$td->COMMAND =>
- "qpdf --static-id --precheck-streams bad-data.pdf a.pdf"},
- {$td->STRING => "", $td->EXIT_STATUS => 0},
- $td->NORMALIZE_NEWLINES);
-$td->runtest("check output",
- {$td->FILE => "a.pdf"},
- {$td->FILE => "bad-data-precheck.pdf"});
+
show_ntests();
# ----------
$td->notify("--- Decode levels ---");
-$n_tests += 10;
+$n_tests += 12;
# image-streams.pdf is the output of examples/pdf-create.
# examples/pdf-create validates the actual image data.
@@ -962,6 +955,14 @@ $td->runtest("check finds bad jpeg data",
{$td->FILE => "bad-jpeg-check.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
+$td->runtest("precheck detects bad jpeg data",
+ {$td->COMMAND => "qpdf --static-id --decode-level=all" .
+ " bad-jpeg.pdf a.pdf"},
+ {$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check file",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "bad-jpeg-out.pdf"});
$td->runtest("get data",
{$td->COMMAND => "qpdf --show-object=6" .
" --filtered-stream-data bad-jpeg.pdf"},
diff --git a/qpdf/qtest/qpdf/bad-data-out.pdf b/qpdf/qtest/qpdf/bad-data-out.pdf
index f4300662..4314025a 100644
--- a/qpdf/qtest/qpdf/bad-data-out.pdf
+++ b/qpdf/qtest/qpdf/bad-data-out.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/bad-data-precheck.pdf b/qpdf/qtest/qpdf/bad-data-precheck.pdf
deleted file mode 100644
index 4314025a..00000000
--- a/qpdf/qtest/qpdf/bad-data-precheck.pdf
+++ /dev/null
Binary files differ