aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-07-29 01:18:57 +0200
committerJay Berkenbilt <ejb@ql.org>2017-07-29 01:19:11 +0200
commit3a1ff5ded9cf22e114991b5a49857b54f8e56b02 (patch)
tree3a3ef059215f619946c316fa6c94d31c6f6b076f
parenta94a729fee946947db7a056f02b326dfce681128 (diff)
downloadqpdf-3a1ff5ded9cf22e114991b5a49857b54f8e56b02.tar.zst
Add option to preserve unreferenced objects
-rw-r--r--ChangeLog11
-rw-r--r--include/qpdf/QPDF.hh6
-rw-r--r--include/qpdf/QPDFWriter.hh7
-rw-r--r--libqpdf/QPDF.cc16
-rw-r--r--libqpdf/QPDFWriter.cc18
-rw-r--r--manual/qpdf-manual.xml21
-rw-r--r--qpdf/qpdf.cc10
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test18
-rw-r--r--qpdf/qtest/qpdf/unreferenced-dropped.pdfbin0 -> 799 bytes
-rw-r--r--qpdf/qtest/qpdf/unreferenced-objects.pdf105
-rw-r--r--qpdf/qtest/qpdf/unreferenced-preserved.pdfbin0 -> 927 bytes
12 files changed, 212 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 026833d4..1a565ff9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,16 @@
+2017-07-28 Jay Berkenbilt <ejb@ql.org>
+
+ * Add --preserve-unreferenced command-line option and
+ setPreserveUnreferencedObjects method to QPDFWriter. This option
+ causes QPDFWriter to write all objects from the input file to the
+ output file regardless of whether the objects are referenced.
+ Objects are written to the output file in numerical order from the
+ input file. This option has no effect for linearized files.
+
2017-07-27 Jay Berkenbilt <ejb@ql.org>
* Add --precheck-streams command-line option and setStreamPrecheck
- option to QPDFWriter to tell QPDFWriter to attempt decoding a
+ method to QPDFWriter to tell QPDFWriter to attempt decoding a
stream fully before deciding whether to filter it or not.
* Recover gracefully from streams that aren't filterable because
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index ef9ce597..ad8503dc 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -396,6 +396,12 @@ class QPDF
QPDF_DLL
void showXRefTable();
+ // Returns a list of indirect objects for every object in the xref
+ // table. Useful for discovering objects that are not otherwised
+ // referenced.
+ QPDF_DLL
+ std::vector<QPDFObjectHandle> getAllObjects();
+
// Optimization support -- see doc/optimization. Implemented in
// QPDF_optimization.cc
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index 2687cce0..fd35fecd 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -155,6 +155,12 @@ class QPDFWriter
QPDF_DLL
void setPrecheckStreams(bool);
+ // Preserve unreferenced objects. The default behavior is to
+ // discard any object that is not visited during a traversal of
+ // the object structure from the trailer.
+ QPDF_DLL
+ void setPreserveUnreferencedObjects(bool);
+
// Set the minimum PDF version. If the PDF version of the input
// file (or previously set minimum version) is less than the
// version passed to this method, the PDF version of the output
@@ -427,6 +433,7 @@ class QPDFWriter
qpdf_stream_data_e stream_data_mode;
bool qdf_mode;
bool precheck_streams;
+ bool preserve_unreferenced_objects;
bool static_id;
bool suppress_original_object_ids;
bool direct_stream_lengths;
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 4d5bf67f..d82813d0 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -989,6 +989,22 @@ QPDF::showXRefTable()
}
}
+std::vector<QPDFObjectHandle>
+QPDF::getAllObjects()
+{
+ std::vector<QPDFObjectHandle> result;
+ for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
+ this->xref_table.begin();
+ iter != this->xref_table.end(); ++iter)
+ {
+
+ QPDFObjGen const& og = (*iter).first;
+ result.push_back(QPDFObjectHandle::Factory::newIndirect(
+ this, og.getObj(), og.getGen()));
+ }
+ return result;
+}
+
void
QPDF::setLastObjectDescription(std::string const& description,
int objid, int generation)
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 59e306fc..01309f43 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -58,6 +58,7 @@ QPDFWriter::init()
stream_data_mode = qpdf_s_compress;
qdf_mode = false;
precheck_streams = false;
+ preserve_unreferenced_objects = false;
static_id = false;
suppress_original_object_ids = false;
direct_stream_lengths = true;
@@ -184,6 +185,12 @@ QPDFWriter::setPrecheckStreams(bool val)
}
void
+QPDFWriter::setPreserveUnreferencedObjects(bool val)
+{
+ this->preserve_unreferenced_objects = val;
+}
+
+void
QPDFWriter::setMinimumPDFVersion(std::string const& version)
{
setMinimumPDFVersion(version, 0);
@@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard()
writeHeader();
writeString(this->extra_header_text);
+ if (this->preserve_unreferenced_objects)
+ {
+ QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
+ std::vector<QPDFObjectHandle> all = this->pdf.getAllObjects();
+ for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
+ iter != all.end(); ++iter)
+ {
+ enqueueObject(*iter);
+ }
+ }
+
// Put root first on queue.
QPDFObjectHandle trailer = getTrimmedTrailer();
enqueueObject(trailer.getKey("/Root"));
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index cd35718d..18abc013 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -839,6 +839,27 @@ outfile.pdf</option>
</listitem>
</varlistentry>
<varlistentry>
+ <term><option>--preserve-unreferenced</option></term>
+ <listitem>
+ <para>
+ Tells qpdf to preserve objects that are not referenced when
+ writing the file. Ordinarily any object that is not referenced
+ in a traversal of the document from the trailer dictionary
+ will be discarded. This may be useful in working with some
+ damaged files or inspecting files with known unreferenced
+ objects.
+ </para>
+ <para>
+ This flag is ignored for linearized files and has the effect
+ of causing objects in the new file to be written in order by
+ object ID from the original file. This does not mean that
+ object numbers will be the same since qpdf may create stream
+ lengths as direct or indirect differently from the original
+ file, and the original file may have gaps in its numbering.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>--qdf</option></term>
<listitem>
<para>
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index 99cfd3a1..65a6de1e 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\
--object-streams=mode controls handing of object streams\n\
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
--precheck-streams precheck ability to decode streams\n\
+--preserve-unreferenced preserve unreferenced objects\n\
--qdf turns on \"QDF mode\" (below)\n\
--min-version=version sets the minimum PDF version of the output file\n\
--force-version=version forces this to be the PDF version of the output file\n\
@@ -1030,6 +1031,7 @@ int main(int argc, char* argv[])
bool ignore_xref_streams = false;
bool qdf_mode = false;
bool precheck_streams = false;
+ bool preserve_unreferenced_objects = false;
std::string min_version;
std::string force_version;
@@ -1219,6 +1221,10 @@ int main(int argc, char* argv[])
{
precheck_streams = true;
}
+ else if (strcmp(arg, "preserve-unreferenced") == 0)
+ {
+ preserve_unreferenced_objects = true;
+ }
else if (strcmp(arg, "min-version") == 0)
{
if (parameter == 0)
@@ -1714,6 +1720,10 @@ int main(int argc, char* argv[])
{
w.setPrecheckStreams(true);
}
+ if (preserve_unreferenced_objects)
+ {
+ w.setPreserveUnreferencedObjects(true);
+ }
if (normalize_set)
{
w.setContentNormalization(normalize);
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index bf227c7a..c64c63ee 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1
QPDFObjectHandle no val for last key 0
QPDF resolve failure to null 0
QPDFWriter precheck stream 0
+QPDFWriter preserve unreferenced standard 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index b61882b9..45ed8c46 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -743,6 +743,24 @@ $td->runtest("check output",
{$td->FILE => "bad-data-precheck.pdf"});
show_ntests();
# ----------
+$td->notify("--- Preserve unreferenced objects ---");
+$n_tests += 4;
+
+$td->runtest("drop unused objects",
+ {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+$td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "unreferenced-dropped.pdf"});
+$td->runtest("keep unused objects",
+ {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" .
+ " unreferenced-objects.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+$td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "unreferenced-preserved.pdf"});
+show_ntests();
+# ----------
$td->notify("--- Copy Foreign Objects ---");
$n_tests += 7;
diff --git a/qpdf/qtest/qpdf/unreferenced-dropped.pdf b/qpdf/qtest/qpdf/unreferenced-dropped.pdf
new file mode 100644
index 00000000..071d8d7d
--- /dev/null
+++ b/qpdf/qtest/qpdf/unreferenced-dropped.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/unreferenced-objects.pdf b/qpdf/qtest/qpdf/unreferenced-objects.pdf
new file mode 100644
index 00000000..a6ea90c8
--- /dev/null
+++ b/qpdf/qtest/qpdf/unreferenced-objects.pdf
@@ -0,0 +1,105 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+2 0 obj
+<<
+ /Pages 1 0 R
+ /Type /Catalog
+>>
+endobj
+
+1 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+3 0 obj
+<<
+ /Contents 4 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 1 0 R
+ /Resources <<
+ /Font <<
+ /F1 6 0 R
+ >>
+ /ProcSet 9 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+4 0 obj
+<<
+ /Length 5 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+44
+endobj
+
+6 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+7 0 obj
+[ 8 0 R ]
+endobj
+
+8 0 obj
+/Potato
+endobj
+
+9 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 10
+0000000000 65535 f
+0000000079 00000 n
+0000000025 00000 n
+0000000161 00000 n
+0000000376 00000 n
+0000000475 00000 n
+0000000494 00000 n
+0000000612 00000 n
+0000000638 00000 n
+0000000662 00000 n
+trailer <<
+ /Root 2 0 R
+ /Size 10
+ /ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>]
+>>
+startxref
+697
+%%EOF
diff --git a/qpdf/qtest/qpdf/unreferenced-preserved.pdf b/qpdf/qtest/qpdf/unreferenced-preserved.pdf
new file mode 100644
index 00000000..b92fff5d
--- /dev/null
+++ b/qpdf/qtest/qpdf/unreferenced-preserved.pdf
Binary files differ