aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2020-12-25 15:51:20 +0100
committerJay Berkenbilt <ejb@ql.org>2020-12-28 18:58:19 +0100
commit858c7b89bc67698a112b3b07885310d8e0132eb0 (patch)
tree4455e5094bb75a94353472088b2257bd179e5ae9
parent1a62cce94012e05e25e81dd7016766d9d039281d (diff)
downloadqpdf-858c7b89bc67698a112b3b07885310d8e0132eb0.tar.zst
Let optimize filter stream parameters instead of making them direct
Also removes preclusion of stream references in stream parameters of filterable streams and reduces write times by about 8% by eliminating an extra traversal of the objects.
-rw-r--r--ChangeLog8
-rw-r--r--libqpdf/QPDFWriter.cc154
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test2
-rw-r--r--qpdf/qtest/qpdf/issue-117.out2
5 files changed, 53 insertions, 114 deletions
diff --git a/ChangeLog b/ChangeLog
index 7c1b43d1..fc376cef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2020-12-25 Jay Berkenbilt <ejb@ql.org>
+
+ * Refactor write code to eliminate an extra full traversal of
+ objects in the file and to remove assumptions that preclude stream
+ references from appearing in /DecodeParms of filterable streams.
+ This results in an approximately 8% performance reduction in write
+ times.
+
2020-12-23 Jay Berkenbilt <ejb@ql.org>
* Allow library users to provide their own decoders for stream
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 2bee43e3..7057930b 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -2452,115 +2452,36 @@ QPDFWriter::getTrimmedTrailer()
void
QPDFWriter::prepareFileForWrite()
{
- // Do a traversal of the entire PDF file structure replacing all
- // indirect objects that QPDFWriter wants to be direct. This
- // includes stream lengths, stream filtering parameters, and
- // document extension level information.
+ // Make document extension level information direct as required by
+ // the spec.
this->m->pdf.fixDanglingReferences(true);
- std::list<QPDFObjectHandle> queue;
- queue.push_back(getTrimmedTrailer());
- std::set<int> visited;
-
- while (! queue.empty())
+ QPDFObjectHandle root = this->m->pdf.getRoot();
+ for (auto const& key: root.getKeys())
{
- QPDFObjectHandle node = queue.front();
- queue.pop_front();
- if (node.isIndirect())
- {
- if (visited.count(node.getObjectID()) > 0)
- {
- continue;
- }
- indicateProgress(false, false);
- visited.insert(node.getObjectID());
- }
-
- if (node.isArray())
- {
- int nitems = node.getArrayNItems();
- for (int i = 0; i < nitems; ++i)
- {
- QPDFObjectHandle oh = node.getArrayItem(i);
- if (! oh.isScalar())
- {
- queue.push_back(oh);
- }
- }
- }
- else if (node.isDictionary() || node.isStream())
- {
- bool is_stream = false;
- bool is_root = false;
- bool filterable = false;
- QPDFObjectHandle dict = node;
- if (node.isStream())
- {
- is_stream = true;
- dict = node.getDict();
- // See whether we are able to filter this stream.
- filterable = node.pipeStreamData(
- 0, 0, this->m->stream_decode_level, true);
- }
- else if (this->m->pdf.getRoot().getObjectID() == node.getObjectID())
+ QPDFObjectHandle oh = root.getKey(key);
+ if ((key == "/Extensions") && (oh.isDictionary()))
+ {
+ bool extensions_indirect = false;
+ if (oh.isIndirect())
{
- is_root = true;
+ QTC::TC("qpdf", "QPDFWriter make Extensions direct");
+ extensions_indirect = true;
+ oh = oh.shallowCopy();
+ root.replaceKey(key, oh);
}
-
- std::set<std::string> keys = dict.getKeys();
- for (std::set<std::string>::iterator iter = keys.begin();
- iter != keys.end(); ++iter)
- {
- std::string const& key = *iter;
- QPDFObjectHandle oh = dict.getKey(key);
- bool add_to_queue = true;
- if (is_stream)
- {
- if (oh.isIndirect() &&
- ((key == "/Length") ||
- (filterable &&
- ((key == "/Filter") ||
- (key == "/DecodeParms")))))
- {
- QTC::TC("qpdf", "QPDFWriter make stream key direct");
- add_to_queue = false;
- oh.makeDirect();
- dict.replaceKey(key, oh);
- }
- }
- else if (is_root)
+ if (oh.hasKey("/ADBE"))
+ {
+ QPDFObjectHandle adbe = oh.getKey("/ADBE");
+ if (adbe.isIndirect())
{
- if ((key == "/Extensions") && (oh.isDictionary()))
- {
- bool extensions_indirect = false;
- if (oh.isIndirect())
- {
- QTC::TC("qpdf", "QPDFWriter make Extensions direct");
- extensions_indirect = true;
- add_to_queue = false;
- oh = oh.shallowCopy();
- dict.replaceKey(key, oh);
- }
- if (oh.hasKey("/ADBE"))
- {
- QPDFObjectHandle adbe = oh.getKey("/ADBE");
- if (adbe.isIndirect())
- {
- QTC::TC("qpdf", "QPDFWriter make ADBE direct",
- extensions_indirect ? 0 : 1);
- adbe.makeDirect();
- oh.replaceKey("/ADBE", adbe);
- }
- }
- }
+ QTC::TC("qpdf", "QPDFWriter make ADBE direct",
+ extensions_indirect ? 0 : 1);
+ adbe.makeDirect();
+ oh.replaceKey("/ADBE", adbe);
}
-
- if (add_to_queue)
- {
- queue.push_back(oh);
- }
- }
- }
+ }
+ }
}
}
@@ -2737,14 +2658,11 @@ QPDFWriter::write()
{
doWriteSetup();
- // Set up progress reporting. We spent about equal amounts of time
- // preparing and writing one pass. To get a rough estimate of
- // progress, we track handling of indirect objects. For linearized
- // files, we write two passes. events_expected is an
- // approximation, but it's good enough for progress reporting,
- // which is mostly a guess anyway.
+ // Set up progress reporting. For linearized files, we write two
+ // passes. events_expected is an approximation, but it's good
+ // enough for progress reporting, which is mostly a guess anyway.
this->m->events_expected = QIntC::to_int(
- this->m->pdf.getObjectCount() * (this->m->linearized ? 3 : 2));
+ this->m->pdf.getObjectCount() * (this->m->linearized ? 2 : 1));
prepareFileForWrite();
@@ -3138,8 +3056,21 @@ QPDFWriter::writeLinearized()
discardGeneration(this->m->object_to_object_stream,
this->m->object_to_object_stream_no_gen);
- bool need_xref_stream = (! this->m->object_to_object_stream.empty());
- this->m->pdf.optimize(this->m->object_to_object_stream_no_gen);
+ auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
+ bool compress_stream;
+ bool is_metadata;
+ if (willFilterStream(stream, compress_stream, is_metadata, nullptr))
+ {
+ return 2;
+ }
+ else
+ {
+ return 1;
+ }
+ };
+
+ this->m->pdf.optimize(this->m->object_to_object_stream_no_gen,
+ true, skip_stream_parameters);
std::vector<QPDFObjectHandle> part4;
std::vector<QPDFObjectHandle> part6;
@@ -3173,6 +3104,7 @@ QPDFWriter::writeLinearized()
int after_second_half = 1 + second_half_uncompressed;
this->m->next_objid = after_second_half;
int second_half_xref = 0;
+ bool need_xref_stream = (! this->m->object_to_object_stream.empty());
if (need_xref_stream)
{
second_half_xref = this->m->next_objid++;
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index f0f96242..908f06e5 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -234,7 +234,6 @@ QPDFWriter extra header text no newline 0
QPDFWriter extra header text add newline 0
QPDF bogus 0 offset 0
QPDF global offset 0
-QPDFWriter make stream key direct 0
QPDFWriter copy V5 0
QPDFWriter increasing extension level 0
QPDFWriter make Extensions direct 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 7dc7e261..6ae330a4 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -716,7 +716,7 @@ my @bug_tests = (
["99b", "object 0", 2],
["100", "xref reconstruction loop", 2],
["101", "resolve for exception text", 2],
- ["117", "other infinite loop", 2],
+ ["117", "other infinite loop", 3],
["118", "other infinite loop", 2],
["119", "other infinite loop", 3],
["120", "other infinite loop", 3],
diff --git a/qpdf/qtest/qpdf/issue-117.out b/qpdf/qtest/qpdf/issue-117.out
index 18157749..540b9c3c 100644
--- a/qpdf/qtest/qpdf/issue-117.out
+++ b/qpdf/qtest/qpdf/issue-117.out
@@ -13,4 +13,4 @@ WARNING: issue-117.pdf (object 7 0, offset 1791): unknown token while reading ob
WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer
WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length
WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347
-attempt to make a stream into a direct object
+qpdf: operation succeeded with warnings; resulting file may have some problems