aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--qpdf/qpdf.cc44
-rw-r--r--qpdf/qtest/qpdf.test2
3 files changed, 57 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 45e9575b..c71dc44b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2020-04-04 Jay Berkenbilt <ejb@ql.org>
+
+ * Add new option --remove-unreferenced-resources that takes auto,
+ yes, or no as options. This tells qpdf whether to attempt to
+ remove unreferenced resources from pages when doing page splitting
+ operations. Prior to this change, the default was to attempt to
+ remove unreferenced resources, but this operation was very slow,
+ especially for large and complex files. The new default is "auto",
+ which tells qpdf to analyze the file for shared resources. This is
+ a relatively quick test. If no shared resources are found, then we
+ don't attempt to remove unreferenced resources, because
+ unreferenced resources never occur in files without shared
+ resources. To force qpdf to look for and remove unreferenced
+ resources, use --remove-unreferenced-resources=yes. The option
+ --preserve-unreferenced-resources is now a synonym for
+ --remove-unreferenced-resources=no.
+
2020-04-03 Jay Berkenbilt <ejb@ql.org>
* Allow qpdf to be built on systems without wchar_t. All "normal"
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index de4a0297..442ee6cc 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -94,6 +94,8 @@ struct UnderOverlay
std::vector<int> repeat_pagenos;
};
+enum remove_unref_e { re_auto, re_yes, re_no };
+
struct Options
{
Options() :
@@ -144,7 +146,7 @@ struct Options
ignore_xref_streams(false),
qdf_mode(false),
preserve_unreferenced_objects(false),
- preserve_unreferenced_page_resources(false),
+ remove_unreferenced_page_resources(re_auto),
keep_files_open(true),
keep_files_open_set(false),
keep_files_open_threshold(200), // default known in help and docs
@@ -243,7 +245,7 @@ struct Options
bool ignore_xref_streams;
bool qdf_mode;
bool preserve_unreferenced_objects;
- bool preserve_unreferenced_page_resources;
+ remove_unref_e remove_unreferenced_page_resources;
bool keep_files_open;
bool keep_files_open_set;
size_t keep_files_open_threshold;
@@ -739,6 +741,7 @@ class ArgParser
void argQdf();
void argPreserveUnreferenced();
void argPreserveUnreferencedResources();
+ void argRemoveUnreferencedResources(char* parameter);
void argKeepFilesOpen(char* parameter);
void argKeepFilesOpenThreshold(char* parameter);
void argNewlineBeforeEndstream();
@@ -970,6 +973,10 @@ ArgParser::initOptionTable()
&ArgParser::argPreserveUnreferenced);
(*t)["preserve-unreferenced-resources"] = oe_bare(
&ArgParser::argPreserveUnreferencedResources);
+ char const* remove_unref_choices[] = {
+ "auto", "yes", "no", 0};
+ (*t)["remove-unreferenced-resources"] = oe_requiredChoices(
+ &ArgParser::argRemoveUnreferencedResources, remove_unref_choices);
(*t)["keep-files-open"] = oe_requiredChoices(
&ArgParser::argKeepFilesOpen, yn);
(*t)["keep-files-open-threshold"] = oe_requiredParameter(
@@ -1459,7 +1466,9 @@ ArgParser::argHelp()
<< "--object-streams=mode controls handing of object streams\n"
<< "--preserve-unreferenced preserve unreferenced objects\n"
<< "--preserve-unreferenced-resources\n"
- << " preserve unreferenced page resources\n"
+ << " synonym for --remove-unreferenced-resources=no\n"
+ << "--remove-unreferenced-resources={auto,yes,no}\n"
+ << " whether to remove unreferenced page resources\n"
<< "--newline-before-endstream always put a newline before endstream\n"
<< "--coalesce-contents force all pages' content to be a single stream\n"
<< "--flatten-annotations=option\n"
@@ -1973,7 +1982,30 @@ ArgParser::argPreserveUnreferenced()
void
ArgParser::argPreserveUnreferencedResources()
{
- o.preserve_unreferenced_page_resources = true;
+ o.remove_unreferenced_page_resources = re_no;
+}
+
+void
+ArgParser::argRemoveUnreferencedResources(char* parameter)
+{
+ if (strcmp(parameter, "auto") == 0)
+ {
+ o.remove_unreferenced_page_resources = re_auto;
+ }
+ else if (strcmp(parameter, "yes") == 0)
+ {
+ o.remove_unreferenced_page_resources = re_yes;
+ }
+ else if (strcmp(parameter, "no") == 0)
+ {
+ o.remove_unreferenced_page_resources = re_no;
+ }
+ else
+ {
+ // If this happens, it means remove_unref_choices in
+ // ArgParser::initOptionTable is wrong.
+ usage("invalid value for --remove-unreferenced-page-resources");
+ }
}
void
@@ -4838,7 +4870,7 @@ static void handle_page_specs(QPDF& pdf, Options& o)
page_spec.range));
}
- if (! o.preserve_unreferenced_page_resources)
+ if (o.remove_unreferenced_page_resources != re_no)
{
for (std::map<std::string, QPDF*>::iterator iter =
page_spec_qpdfs.begin();
@@ -5336,7 +5368,7 @@ static void do_split_pages(QPDF& pdf, Options& o)
before = std::string(o.outfilename) + "-";
}
- if (! o.preserve_unreferenced_page_resources)
+ if (o.remove_unreferenced_page_resources != re_no)
{
QPDFPageDocumentHelper dh(pdf);
dh.removeUnreferencedResources();
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index e1cd6bf4..1184fa2b 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -2120,6 +2120,7 @@ $td->runtest("check output",
$td->runtest("split with shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
+ " --remove-unreferenced-resources=yes" .
" shared-images.pdf --pages . 1,3" .
" ./shared-images.pdf 1,2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
@@ -2130,6 +2131,7 @@ $td->runtest("check output",
$td->runtest("split with really shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
+ " --remove-unreferenced-resources=yes" .
" shared-images.pdf --pages . 1,3" .
" . 1,2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});