aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--qpdf/qpdf.cc144
-rw-r--r--qpdf/qpdf.testcov3
-rw-r--r--qpdf/qtest/qpdf.test15
-rw-r--r--qpdf/qtest/qpdf/disable-kfo.out104
-rw-r--r--qpdf/qtest/qpdf/enable-kfo.out22
-rw-r--r--qpdf/qtest/qpdf/kfo-n.out20
-rw-r--r--qpdf/qtest/qpdf/kfo-y.out20
-rw-r--r--qpdf/qtest/qpdf/shared-form-images-xobject.pdfbin0 -> 9793 bytes
-rw-r--r--qpdf/qtest/qpdf/shared-form-xobject-split-1.pdfbin0 -> 4840 bytes
-rw-r--r--qpdf/qtest/qpdf/shared-form-xobject-split-2.pdfbin0 -> 5060 bytes
-rw-r--r--qpdf/qtest/qpdf/split-pages-group.out2
-rw-r--r--qpdf/qtest/qpdf/uo-6.out2
-rw-r--r--qpdf/qtest/qpdf/verbose-merge.out8
13 files changed, 336 insertions, 4 deletions
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index 442ee6cc..38aa2f4a 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -4750,6 +4750,140 @@ static void handle_transformations(QPDF& pdf, Options& o)
}
}
+static bool should_remove_unreferenced_resources(QPDF& pdf, Options& o)
+{
+ if (o.remove_unreferenced_page_resources == re_no)
+ {
+ return false;
+ }
+ else if (o.remove_unreferenced_page_resources == re_yes)
+ {
+ return true;
+ }
+
+ // Unreferenced resources are common in files where resources
+ // dictionaries are shared across pages. As a heuristic, we look
+ // in the file for shared resources dictionaries or shared XObject
+ // subkeys of resources dictionaries either on pages or on form
+ // XObjects in pages. If we find any, then there is a higher
+ // likeilihood that the expensive process of finding unreferenced
+ // resources is worth it.
+
+ // Return true as soon as we find any shared resources.
+
+ std::set<QPDFObjGen> resources_seen; // shared resources detection
+ std::set<QPDFObjGen> nodes_seen; // loop detection
+
+ if (o.verbose)
+ {
+ std::cout << whoami << ": " << pdf.getFilename()
+ << ": checking for shared resources" << std::endl;
+ }
+
+ std::list<QPDFObjectHandle> queue;
+ queue.push_back(pdf.getRoot().getKey("/Pages"));
+ while (! queue.empty())
+ {
+ QPDFObjectHandle node = *queue.begin();
+ QPDFObjGen og = node.getObjGen();
+ if (nodes_seen.count(og))
+ {
+ continue;
+ }
+ nodes_seen.insert(og);
+ queue.pop_front();
+ QPDFObjectHandle dict = node.isStream() ? node.getDict() : node;
+ QPDFObjectHandle kids = dict.getKey("/Kids");
+ if (kids.isArray())
+ {
+ // This is a non-leaf node.
+ if (dict.hasKey("/Resources"))
+ {
+ QTC::TC("qpdf", "qpdf found resources in non-leaf");
+ if (o.verbose)
+ {
+ std::cout << " found resources in non-leaf page node "
+ << og.getObj() << " " << og.getGen()
+ << std::endl;
+ }
+ return true;
+ }
+ int n = kids.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ queue.push_back(kids.getArrayItem(i));
+ }
+ }
+ else
+ {
+ // This is a leaf node or a form XObject.
+ QPDFObjectHandle resources = dict.getKey("/Resources");
+ if (resources.isIndirect())
+ {
+ QPDFObjGen resources_og = resources.getObjGen();
+ if (resources_seen.count(resources_og))
+ {
+ QTC::TC("qpdf", "qpdf found shared resources in leaf");
+ if (o.verbose)
+ {
+ std::cout << " found shared resources in leaf node "
+ << og.getObj() << " " << og.getGen()
+ << ": "
+ << resources_og.getObj() << " "
+ << resources_og.getGen()
+ << std::endl;
+ }
+ return true;
+ }
+ resources_seen.insert(resources_og);
+ }
+ QPDFObjectHandle xobject = resources.getKey("/XObject");
+ if (xobject.isIndirect())
+ {
+ QPDFObjGen xobject_og = xobject.getObjGen();
+ if (resources_seen.count(xobject_og))
+ {
+ QTC::TC("qpdf", "qpdf found shared xobject in leaf");
+ if (o.verbose)
+ {
+ std::cout << " found shared xobject in leaf node "
+ << og.getObj() << " " << og.getGen()
+ << ": "
+ << xobject_og.getObj() << " "
+ << xobject_og.getGen()
+ << std::endl;
+ }
+ return true;
+ }
+ resources_seen.insert(xobject_og);
+ }
+ if (xobject.isDictionary())
+ {
+ for (auto k: xobject.getKeys())
+ {
+ QPDFObjectHandle xobj = xobject.getKey(k);
+ if (xobj.isStream() &&
+ xobj.getDict().getKey("/Type").isName() &&
+ ("/XObject" ==
+ xobj.getDict().getKey("/Type").getName()) &&
+ xobj.getDict().getKey("/Subtype").isName() &&
+ ("/Form" ==
+ xobj.getDict().getKey("/Subtype").getName()))
+ {
+ queue.push_back(xobj);
+ }
+ }
+ }
+ }
+ }
+
+ if (o.verbose)
+ {
+ std::cout << whoami << ": no shared resources found" << std::endl;
+ }
+ return false;
+}
+
static void handle_page_specs(QPDF& pdf, Options& o)
{
// Parse all page specifications and translate them into lists of
@@ -4883,8 +5017,12 @@ static void handle_page_specs(QPDF& pdf, Options& o)
cis = page_spec_cfis[filename];
cis->stayOpen(true);
}
- QPDFPageDocumentHelper dh(*((*iter).second));
- dh.removeUnreferencedResources();
+ QPDF& other(*((*iter).second));
+ if (should_remove_unreferenced_resources(other, o))
+ {
+ QPDFPageDocumentHelper dh(other);
+ dh.removeUnreferencedResources();
+ }
if (cis)
{
cis->stayOpen(false);
@@ -5368,7 +5506,7 @@ static void do_split_pages(QPDF& pdf, Options& o)
before = std::string(o.outfilename) + "-";
}
- if (o.remove_unreferenced_page_resources != re_no)
+ if (should_remove_unreferenced_resources(pdf, o))
{
QPDFPageDocumentHelper dh(pdf);
dh.removeUnreferencedResources();
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 09d40c23..6834c7ad 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -450,3 +450,6 @@ QPDFWriter no encryption sig contents 0
QPDFPageObjectHelper colorspace lookup 0
QPDFWriter ignore XRef in qdf mode 0
QPDFPageObjectHelper filter form xobject 0
+qpdf found resources in non-leaf 0
+qpdf found shared resources in leaf 0
+qpdf found shared xobject in leaf 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 1184fa2b..560bee07 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -1699,7 +1699,7 @@ my @sp_cases = (
[11, 'pdf extension', '', 'split-out.Pdf'],
[4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'],
);
-$n_tests += 32;
+$n_tests += 35;
$n_compare_pdfs += 1;
for (@sp_cases)
{
@@ -1808,6 +1808,7 @@ foreach my $i (qw(1 2 3 4))
$td->runtest("unreferenced resources with bad token",
{$td->COMMAND =>
"qpdf --qdf --static-id --split-pages=2" .
+ " --remove-unreferenced-resources=yes" .
" coalesce.pdf split-out-bad-token.pdf"},
{$td->FILE => "coalesce-split.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
@@ -1834,6 +1835,18 @@ $td->runtest("check output",
{$td->FILE => "shared-form-images-merged.pdf"});
compare_pdfs("shared-form-images.pdf", "a.pdf");
+$td->runtest("shared form xobject subkey",
+ {$td->COMMAND => "qpdf --qdf --static-id --split-pages".
+ " shared-form-images-xobject.pdf" .
+ " split-out-shared-form-xobject.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+foreach my $i (qw(1 2))
+{
+ $td->runtest("check output ($i)",
+ {$td->FILE => "split-out-shared-form-xobject-$i.pdf"},
+ {$td->FILE => "shared-form-xobject-split-$i.pdf"});
+}
+
show_ntests();
# ----------
$td->notify("--- Keep Files Open ---");
diff --git a/qpdf/qtest/qpdf/disable-kfo.out b/qpdf/qtest/qpdf/disable-kfo.out
index a33044e0..f8f4622e 100644
--- a/qpdf/qtest/qpdf/disable-kfo.out
+++ b/qpdf/qtest/qpdf/disable-kfo.out
@@ -50,6 +50,110 @@ qpdf: processing 048-kfo.pdf
qpdf: processing 049-kfo.pdf
qpdf: processing 050-kfo.pdf
qpdf: processing 051-kfo.pdf
+qpdf: empty PDF: checking for shared resources
+qpdf: no shared resources found
+qpdf: 001-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 002-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 003-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 004-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 005-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 006-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 007-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 008-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 009-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 010-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 011-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 012-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 013-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 014-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 015-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 016-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 017-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 018-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 019-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 020-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 021-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 022-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 023-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 024-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 025-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 026-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 027-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 028-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 029-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 030-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 031-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 032-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 033-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 034-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 035-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 036-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 037-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 038-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 039-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 040-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 041-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 042-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 043-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 044-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 045-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 046-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 047-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 048-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 049-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 050-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 051-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: removing unreferenced pages from primary input
qpdf: adding pages from 001-kfo.pdf
qpdf: adding pages from 002-kfo.pdf
diff --git a/qpdf/qtest/qpdf/enable-kfo.out b/qpdf/qtest/qpdf/enable-kfo.out
index 0c011518..e49e7a12 100644
--- a/qpdf/qtest/qpdf/enable-kfo.out
+++ b/qpdf/qtest/qpdf/enable-kfo.out
@@ -9,6 +9,28 @@ qpdf: processing 016-kfo.pdf
qpdf: processing 017-kfo.pdf
qpdf: processing 018-kfo.pdf
qpdf: processing 019-kfo.pdf
+qpdf: empty PDF: checking for shared resources
+qpdf: no shared resources found
+qpdf: 010-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 011-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 012-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 013-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 014-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 015-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 016-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 017-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 018-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 019-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: removing unreferenced pages from primary input
qpdf: adding pages from 010-kfo.pdf
qpdf: adding pages from 011-kfo.pdf
diff --git a/qpdf/qtest/qpdf/kfo-n.out b/qpdf/qtest/qpdf/kfo-n.out
index 0fe71ca0..1964bcdd 100644
--- a/qpdf/qtest/qpdf/kfo-n.out
+++ b/qpdf/qtest/qpdf/kfo-n.out
@@ -7,6 +7,26 @@ qpdf: processing 006-kfo.pdf
qpdf: processing 007-kfo.pdf
qpdf: processing 008-kfo.pdf
qpdf: processing 009-kfo.pdf
+qpdf: empty PDF: checking for shared resources
+qpdf: no shared resources found
+qpdf: 001-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 002-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 003-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 004-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 005-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 006-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 007-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 008-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 009-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: removing unreferenced pages from primary input
qpdf: adding pages from 001-kfo.pdf
qpdf: adding pages from 002-kfo.pdf
diff --git a/qpdf/qtest/qpdf/kfo-y.out b/qpdf/qtest/qpdf/kfo-y.out
index 0fe71ca0..1964bcdd 100644
--- a/qpdf/qtest/qpdf/kfo-y.out
+++ b/qpdf/qtest/qpdf/kfo-y.out
@@ -7,6 +7,26 @@ qpdf: processing 006-kfo.pdf
qpdf: processing 007-kfo.pdf
qpdf: processing 008-kfo.pdf
qpdf: processing 009-kfo.pdf
+qpdf: empty PDF: checking for shared resources
+qpdf: no shared resources found
+qpdf: 001-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 002-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 003-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 004-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 005-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 006-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 007-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 008-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 009-kfo.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: removing unreferenced pages from primary input
qpdf: adding pages from 001-kfo.pdf
qpdf: adding pages from 002-kfo.pdf
diff --git a/qpdf/qtest/qpdf/shared-form-images-xobject.pdf b/qpdf/qtest/qpdf/shared-form-images-xobject.pdf
new file mode 100644
index 00000000..b8c5ead1
--- /dev/null
+++ b/qpdf/qtest/qpdf/shared-form-images-xobject.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/shared-form-xobject-split-1.pdf b/qpdf/qtest/qpdf/shared-form-xobject-split-1.pdf
new file mode 100644
index 00000000..a0a9ec88
--- /dev/null
+++ b/qpdf/qtest/qpdf/shared-form-xobject-split-1.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/shared-form-xobject-split-2.pdf b/qpdf/qtest/qpdf/shared-form-xobject-split-2.pdf
new file mode 100644
index 00000000..06dce552
--- /dev/null
+++ b/qpdf/qtest/qpdf/shared-form-xobject-split-2.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/split-pages-group.out b/qpdf/qtest/qpdf/split-pages-group.out
index ec00ab7e..d89f5e48 100644
--- a/qpdf/qtest/qpdf/split-pages-group.out
+++ b/qpdf/qtest/qpdf/split-pages-group.out
@@ -1,3 +1,5 @@
+qpdf: 11-pages.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: wrote file split-out-group-01-05.pdf
qpdf: wrote file split-out-group-06-10.pdf
qpdf: wrote file split-out-group-11-11.pdf
diff --git a/qpdf/qtest/qpdf/uo-6.out b/qpdf/qtest/qpdf/uo-6.out
index 22395614..5a199670 100644
--- a/qpdf/qtest/qpdf/uo-6.out
+++ b/qpdf/qtest/qpdf/uo-6.out
@@ -1,4 +1,6 @@
qpdf: selecting --keep-open-files=y
+qpdf: fxo-red.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: removing unreferenced pages from primary input
qpdf: adding pages from fxo-red.pdf
qpdf: processing underlay/overlay
diff --git a/qpdf/qtest/qpdf/verbose-merge.out b/qpdf/qtest/qpdf/verbose-merge.out
index 9de13e16..6f9846fb 100644
--- a/qpdf/qtest/qpdf/verbose-merge.out
+++ b/qpdf/qtest/qpdf/verbose-merge.out
@@ -2,6 +2,14 @@ qpdf: selecting --keep-open-files=y
qpdf: processing 20-pages.pdf
qpdf: processing ./20-pages.pdf
qpdf: processing minimal.pdf
+qpdf: ./20-pages.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: 20-pages.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: minimal.pdf: checking for shared resources
+qpdf: no shared resources found
+qpdf: page-labels-and-outlines.pdf: checking for shared resources
+qpdf: no shared resources found
qpdf: removing unreferenced pages from primary input
qpdf: adding pages from page-labels-and-outlines.pdf
qpdf: adding pages from 20-pages.pdf