Bug fix: handle generation > 0 when generating object streams

Rework QPDFWriter to always track old object IDs and QPDFObjGen instead of int, thus not discarding the generation number. Switch to QPDF::getCompressibleObjGen() to properly handle the case of an old object eligible for compression that has a generation of other than zero.
author: Jay Berkenbilt <ejb@ql.org> 2013-06-14 20:53:47 +0200
committer: Jay Berkenbilt <ejb@ql.org> 2013-06-14 20:58:09 +0200
commit: a3576a73593987b26cd3eff346f8f7c11f713cbd (patch)
tree: 3db5b39d339cddacf2fc1b13ecf4c028bc0589b5
parent: 96eb96511557469d50bbd55d211ced16acc31e13 (diff)
download: qpdf-a3576a73593987b26cd3eff346f8f7c11f713cbd.tar.zst
9 files changed, 285 insertions, 85 deletions
diff --git a/ChangeLog b/ChangeLog
index a71b3f18..5dafe0bc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2013-06-14  Jay Berkenbilt  <ejb@ql.org>
 
+	* Bug fix: properly handle object stream generation when the
+	original file has some compressible objects with generation != 0.
+
+	* Add QPDF::getCompressibleObjGens() and deprecate
+	QPDF::getCompressibleObjects(), which had a flaw in its logic.
+
 	* Add new QPDFObjectHandle::getObjGen() method and indiciate in
 	comments that its use is favored over getObjectID() and
 	getGeneration() for most cases.
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index fd0c9b54..b90dea3b 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -434,8 +434,19 @@ class QPDF
     // Map object to object stream that contains it
     QPDF_DLL
     void getObjectStreamData(std::map<int, int>&);
+
     // Get a list of objects that would be permitted in an object
-    // stream
+    // stream.
+    QPDF_DLL
+    std::vector<QPDFObjGen> getCompressibleObjGens();
+
+    // Deprecated: get a list of objects that would be permitted in an
+    // object stream.  This method is deprecated and will be removed.
+    // It's incorrect because it disregards the generations of the
+    // compressible objects, which can lead (and has lead) to bugs.
+    // This method will throw an exception if any of the objects
+    // returned have a generation of other than zero.  Use
+    // getCompressibleObjGens() instead.
     QPDF_DLL
     std::vector<int> getCompressibleObjects();
 
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index e2f731d3..92104142 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -24,6 +24,7 @@
 
 #include <qpdf/Constants.h>
 
+#include <qpdf/QPDFObjGen.hh>
 #include <qpdf/QPDFXRefEntry.hh>
 
 #include <qpdf/Pl_Buffer.hh>
@@ -289,7 +290,7 @@ class QPDFWriter
     void writeStringQDF(std::string const& str);
     void writeStringNoQDF(std::string const& str);
     void writePad(int nspaces);
-    void assignCompressedObjectNumbers(int objid);
+    void assignCompressedObjectNumbers(QPDFObjGen const& og);
     void enqueueObject(QPDFObjectHandle object);
     void writeObjectStreamOffsets(
         std::vector<qpdf_offset_t>& offsets, int first_obj);
@@ -380,6 +381,9 @@ class QPDFWriter
     void pushEncryptionFilter();
     void pushDiscardFilter();
 
+    void discardGeneration(std::map<QPDFObjGen, int> const& in,
+                           std::map<int, int>& out);
+
     QPDF& pdf;
     char const* filename;
     FILE* file;
@@ -419,7 +423,7 @@ class QPDFWriter
     std::list<PointerHolder<Pipeline> > to_delete;
     Pl_Count* pipeline;
     std::list<QPDFObjectHandle> object_queue;
-    std::map<int, int> obj_renumber;
+    std::map<QPDFObjGen, int> obj_renumber;
     std::map<int, QPDFXRefEntry> xref;
     std::map<int, qpdf_offset_t> lengths;
     int next_objid;
@@ -427,12 +431,16 @@ class QPDFWriter
     size_t cur_stream_length;
     bool added_newline;
     int max_ostream_index;
-    std::set<int> normalized_streams;
-    std::map<int, int> page_object_to_seq;
-    std::map<int, int> contents_to_page_seq;
-    std::map<int, int> object_to_object_stream;
-    std::map<int, std::set<int> > object_stream_to_objects;
+    std::set<QPDFObjGen> normalized_streams;
+    std::map<QPDFObjGen, int> page_object_to_seq;
+    std::map<QPDFObjGen, int> contents_to_page_seq;
+    std::map<QPDFObjGen, int> object_to_object_stream;
+    std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
     std::list<Pipeline*> pipeline_stack;
+
+    // For linearization only
+    std::map<int, int> obj_renumber_no_gen;
+    std::map<int, int> object_to_object_stream_no_gen;
 };
 
 #endif // __QPDFWRITER_HH__
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 5c5e0e79..00e13ca0 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map<int, int>& omap)
 std::vector<int>
 QPDF::getCompressibleObjects()
 {
-    // Return a set of object numbers of objects that are allowed to
-    // be in object streams.  We disregard generation numbers here
-    // since this is a helper function for QPDFWriter which is going
-    // to renumber objects anyway.  This code will do weird things if
-    // we have two objects with the same object number and different
-    // generations, but so do virtually all PDF consumers,
-    // particularly since this is not a permitted condition.
-
-    // We walk through the objects by traversing the document from the
-    // root, including a traversal of the pages tree.  This makes that
-    // objects that are on the same page are more likely to be in the
-    // same object stream, which is slightly more efficient,
+    std::vector<QPDFObjGen> objects = getCompressibleObjGens();
+    std::vector<int> result;
+    for (std::vector<QPDFObjGen>::iterator iter = objects.begin();
+         iter != objects.end(); ++iter)
+    {
+        if ((*iter).getGen() != 0)
+        {
+            throw std::logic_error(
+                "QPDF::getCompressibleObjects() would return an object ID"
+                " for an object with generation != 0.  Use"
+                " QPDF::getCompressibleObjGens() instead."
+                "  See comments in QPDF.hh.");
+        }
+        else
+        {
+            result.push_back((*iter).getObj());
+        }
+    }
+    return result;
+}
+
+std::vector<QPDFObjGen>
+QPDF::getCompressibleObjGens()
+{
+    // Return a list of objects that are allowed to be in object
+    // streams.  Walk through the objects by traversing the document
+    // from the root, including a traversal of the pages tree.  This
+    // makes that objects that are on the same page are more likely to
+    // be in the same object stream, which is slightly more efficient,
     // particularly with linearized files.  This is better than
     // iterating through the xref table since it avoids preserving
     // orphaned items.
 
     // Exclude encryption dictionary, if any
-    int encryption_dict_id = 0;
     QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
-    if (encryption_dict.isIndirect())
-    {
-	encryption_dict_id = encryption_dict.getObjectID();
-    }
+    QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
 
-    std::set<int> visited;
+    std::set<QPDFObjGen> visited;
     std::list<QPDFObjectHandle> queue;
     queue.push_front(this->trailer);
-    std::vector<int> result;
+    std::vector<QPDFObjGen> result;
     while (! queue.empty())
     {
 	QPDFObjectHandle obj = queue.front();
 	queue.pop_front();
 	if (obj.isIndirect())
 	{
-	    int objid = obj.getObjectID();
-	    if (visited.count(objid))
+	    QPDFObjGen og = obj.getObjGen();
+	    if (visited.count(og))
 	    {
 		QTC::TC("qpdf", "QPDF loop detected traversing objects");
 		continue;
 	    }
-	    if (objid == encryption_dict_id)
+	    if (og == encryption_dict_og)
 	    {
 		QTC::TC("qpdf", "QPDF exclude encryption dictionary");
 	    }
 	    else if (! obj.isStream())
 	    {
-		result.push_back(objid);
+		result.push_back(og);
 	    }
-	    visited.insert(objid);
+	    visited.insert(og);
 	}
 	if (obj.isStream())
 	{
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index bd7ce05f..dde6e559 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid)
 }
 
 void
-QPDFWriter::assignCompressedObjectNumbers(int objid)
+QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
 {
-    if (this->object_stream_to_objects.count(objid) == 0)
+    int objid = og.getObj();
+    if ((og.getGen() != 0) ||
+        (this->object_stream_to_objects.count(objid) == 0))
     {
+        // This is not an object stream.
 	return;
     }
 
     // Reserve numbers for the objects that belong to this object
     // stream.
-    for (std::set<int>::iterator iter =
+    for (std::set<QPDFObjGen>::iterator iter =
 	     this->object_stream_to_objects[objid].begin();
 	 iter != this->object_stream_to_objects[objid].end();
 	 ++iter)
@@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
 	{
 	    // This is a place-holder object for an object stream
 	}
-	int objid = object.getObjectID();
+	QPDFObjGen og = object.getObjGen();
 
-	if (obj_renumber.count(objid) == 0)
+	if (obj_renumber.count(og) == 0)
 	{
-	    if (this->object_to_object_stream.count(objid))
+	    if (this->object_to_object_stream.count(og))
 	    {
 		// This is in an object stream.  Don't process it
-		// here.  Instead, enqueue the object stream.
-		int stream_id = this->object_to_object_stream[objid];
+		// here.  Instead, enqueue the object stream.  Object
+		// streams always have generation 0.
+		int stream_id = this->object_to_object_stream[og];
 		enqueueObject(this->pdf.getObjectByID(stream_id, 0));
 	    }
 	    else
 	    {
 		object_queue.push_back(object);
-		obj_renumber[objid] = next_objid++;
+		obj_renumber[og] = next_objid++;
 
-		if (this->object_stream_to_objects.count(objid))
+		if ((og.getGen() == 0) &&
+                    this->object_stream_to_objects.count(og.getObj()))
 		{
 		    // For linearized files, uncompressed objects go
 		    // at end, and we take care of assigning numbers
 		    // to them elsewhere.
 		    if (! this->linearized)
 		    {
-			assignCompressedObjectNumbers(objid);
+			assignCompressedObjectNumbers(og);
 		    }
 		}
 		else if ((! this->direct_stream_lengths) && object.isStream())
@@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
     }
     if (child.isIndirect())
     {
-	int old_id = child.getObjectID();
-	int new_id = obj_renumber[old_id];
+	QPDFObjGen old_og = child.getObjGen();
+	int new_id = obj_renumber[old_og];
 	writeString(QUtil::int_to_string(new_id));
 	writeString(" 0 R");
     }
@@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
 			  unsigned int flags, size_t stream_length,
                           bool compress)
 {
-    int old_id = object.getObjectID();
+    QPDFObjGen old_og = object.getObjGen();
     unsigned int child_flags = flags & ~f_stream;
 
     std::string indent;
@@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
         bool have_extensions_adbe = false;
 
         QPDFObjectHandle extensions;
-        if (old_id == pdf.getRoot().getObjectID())
+        if (old_og == pdf.getRoot().getObjGen())
         {
             is_root = true;
             if (object.hasKey("/Extensions") &&
@@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
     else if (object.isStream())
     {
 	// Write stream data to a buffer.
-	int new_id = obj_renumber[old_id];
+	int new_id = obj_renumber[old_og];
 	if (! this->direct_stream_lengths)
 	{
 	    this->cur_stream_length_id = new_id + 1;
@@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
 	    filter = true;
 	    compress = false;
 	}
-	else if (this->normalize_content && normalized_streams.count(old_id))
+	else if (this->normalize_content && normalized_streams.count(old_og))
 	{
 	    normalize = true;
 	    filter = true;
@@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
     // Note: object might be null if this is a place-holder for an
     // object stream that we are generating from scratch.
 
-    int old_id = object.getObjectID();
-    int new_id = obj_renumber[old_id];
+    QPDFObjGen old_og = object.getObjGen();
+    assert(old_og.getGen() == 0);
+    int old_id = old_og.getObj();
+    int new_id = obj_renumber[old_og];
 
     std::vector<qpdf_offset_t> offsets;
     qpdf_offset_t first = 0;
@@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
 	}
 
 	int count = 0;
-	for (std::set<int>::iterator iter =
+	for (std::set<QPDFObjGen>::iterator iter =
 		 this->object_stream_to_objects[old_id].begin();
 	     iter != this->object_stream_to_objects[old_id].end();
 	     ++iter, ++count)
 	{
-	    int obj = *iter;
+	    QPDFObjGen obj = *iter;
 	    int new_obj = this->obj_renumber[obj];
 	    if (first_obj == -1)
 	    {
@@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
 		if (! this->suppress_original_object_ids)
 		{
 		    writeString("; original object ID: " +
-				QUtil::int_to_string(obj));
+				QUtil::int_to_string(obj.getObj()));
+                    // For compatibility, only write the generation if
+                    // non-zero.  While object streams only allow
+                    // objects with generation 0, if we are generating
+                    // object streams, the old object could have a
+                    // non-zero generation.
+                    if (obj.getGen() != 0)
+                    {
+                        QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
+                        writeString(" " + QUtil::int_to_string(obj.getGen()));
+                    }
 		}
 		writeString("\n");
 	    }
@@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
 	    {
 		offsets.push_back(this->pipeline->getCount());
 	    }
-	    writeObject(this->pdf.getObjectByID(obj, 0), count);
+	    writeObject(this->pdf.getObjectByObjGen(obj), count);
 
 	    this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
 	}
@@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
 void
 QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
 {
-    int old_id = object.getObjectID();
+    QPDFObjGen old_og = object.getObjGen();
 
     if ((object_stream_index == -1) &&
-	(this->object_stream_to_objects.count(old_id)))
+        (old_og.getGen() == 0) &&
+	(this->object_stream_to_objects.count(old_og.getObj())))
     {
 	writeObjectStream(object);
 	return;
     }
 
-    int new_id = obj_renumber[old_id];
+    int new_id = obj_renumber[old_og];
     if (this->qdf_mode)
     {
-	if (this->page_object_to_seq.count(old_id))
+	if (this->page_object_to_seq.count(old_og))
 	{
 	    writeString("%% Page ");
 	    writeString(
 		QUtil::int_to_string(
-		    this->page_object_to_seq[old_id]));
+		    this->page_object_to_seq[old_og]));
 	    writeString("\n");
 	}
-	if (this->contents_to_page_seq.count(old_id))
+	if (this->contents_to_page_seq.count(old_og))
 	{
 	    writeString("%% Contents for page ");
 	    writeString(
 		QUtil::int_to_string(
-		    this->contents_to_page_seq[old_id]));
+		    this->contents_to_page_seq[old_og]));
 	    writeString("\n");
 	}
     }
@@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams()
 	 iter != pages.end(); ++iter)
     {
 	QPDFObjectHandle& page = *iter;
-	this->page_object_to_seq[page.getObjectID()] = ++num;
+	this->page_object_to_seq[page.getObjGen()] = ++num;
 	QPDFObjectHandle contents = page.getKey("/Contents");
-	std::vector<int> contents_objects;
+	std::vector<QPDFObjGen> contents_objects;
 	if (contents.isArray())
 	{
 	    int n = contents.getArrayNItems();
 	    for (int i = 0; i < n; ++i)
 	    {
 		contents_objects.push_back(
-		    contents.getArrayItem(i).getObjectID());
+		    contents.getArrayItem(i).getObjGen());
 	    }
 	}
 	else if (contents.isStream())
 	{
-	    contents_objects.push_back(contents.getObjectID());
+	    contents_objects.push_back(contents.getObjGen());
 	}
 
-	for (std::vector<int>::iterator iter = contents_objects.begin();
+	for (std::vector<QPDFObjGen>::iterator iter = contents_objects.begin();
 	     iter != contents_objects.end(); ++iter)
 	{
 	    this->contents_to_page_seq[*iter] = num;
@@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams()
 void
 QPDFWriter::preserveObjectStreams()
 {
-    this->pdf.getObjectStreamData(this->object_to_object_stream);
+    // Our object_to_object_stream map has to map ObjGen -> ObjGen
+    // since we may be generating object streams out of old objects
+    // that have generation numbers greater than zero.  However in an
+    // existing PDF, all object stream objects and all objects in them
+    // must have generation 0 because the PDF spec does not provide
+    // any way to do otherwise.
+    std::map<int, int> omap;
+    this->pdf.getObjectStreamData(omap);
+    for (std::map<int, int>::iterator iter = omap.begin();
+         iter != omap.end(); ++iter)
+    {
+        this->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
+            (*iter).second;
+    }
 }
 
 void
@@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams()
 
     // This code doesn't do anything with /Extends.
 
-    std::vector<int> const& eligible = this->pdf.getCompressibleObjects();
+    std::vector<QPDFObjGen> const& eligible =
+        this->pdf.getCompressibleObjGens();
     unsigned int n_object_streams = (eligible.size() + 99) / 100;
     unsigned int n_per = eligible.size() / n_object_streams;
     if (n_per * n_object_streams < eligible.size())
@@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams()
     }
     unsigned int n = 0;
     int cur_ostream = 0;
-    for (std::vector<int>::const_iterator iter = eligible.begin();
+    for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
 	 iter != eligible.end(); ++iter)
     {
 	if ((n % n_per) == 0)
@@ -2172,11 +2204,11 @@ QPDFWriter::write()
 	     iter != pages.end(); ++iter)
 	{
 	    QPDFObjectHandle& page = *iter;
-	    int objid = page.getObjectID();
-	    if (this->object_to_object_stream.count(objid))
+	    QPDFObjGen og = page.getObjGen();
+	    if (this->object_to_object_stream.count(og))
 	    {
 		QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
-		this->object_to_object_stream.erase(objid);
+		this->object_to_object_stream.erase(og);
 	    }
 	}
     }
@@ -2188,20 +2220,20 @@ QPDFWriter::write()
     	// 8.0.0 has a bug that prevents it from being able to handle
     	// encrypted files with compressed document catalogs, so we
     	// disable them in that case as well.
-	int objid = pdf.getRoot().getObjectID();
-	if (this->object_to_object_stream.count(objid))
+	QPDFObjGen og = pdf.getRoot().getObjGen();
+	if (this->object_to_object_stream.count(og))
 	{
 	    QTC::TC("qpdf", "QPDFWriter uncompressing root");
-	    this->object_to_object_stream.erase(objid);
+	    this->object_to_object_stream.erase(og);
 	}
     }
 
     // Generate reverse mapping from object stream to objects
-    for (std::map<int, int>::iterator iter =
+    for (std::map<QPDFObjGen, int>::iterator iter =
 	     this->object_to_object_stream.begin();
 	 iter != this->object_to_object_stream.end(); ++iter)
     {
-	int obj = (*iter).first;
+	QPDFObjGen obj = (*iter).first;
 	int stream = (*iter).second;
 	this->object_stream_to_objects[stream].insert(obj);
 	this->max_ostream_index =
@@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id)
     int S = 0;
     int O = 0;
     pdf.generateHintStream(
-	this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O);
+	this->xref, this->lengths, this->obj_renumber_no_gen,
+        hint_buffer, S, O);
 
     openObject(hint_id);
     setDataKey(hint_id);
@@ -2522,19 +2555,57 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes)
 }
 
 void
+QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
+                              std::map<int, int>& out)
+{
+    // There are deep assumptions in the linearization code in QPDF
+    // that there is only one object with each object number; i.e.,
+    // you can't have two objects with the same object number and
+    // different generations.  This is a pretty safe assumption
+    // because Adobe Reader and Acrobat can't actually handle this
+    // case.  There is not much if any code in QPDF outside
+    // linearization that assumes this, but the linearization code as
+    // currently implemented would do weird things if we found such a
+    // case.  In order to avoid breaking ABI changes in QPDF, we will
+    // first assert that this condition holds.  Then we can create new
+    // maps for QPDF that throw away generation numbers.
+
+    out.clear();
+    for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
+         iter != in.end(); ++iter)
+    {
+        if (out.count((*iter).first.getObj()))
+        {
+            throw std::logic_error(
+                "QPDF cannot currently linearize files that contain"
+                " multiple objects with the same object ID and different"
+                " generations.  If you see this error message, please file"
+                " a bug report and attach the file if possible.  As a"
+                " workaround, first convert the file with qpdf without"
+                " linearizing, and then linearize the result of that"
+                " conversion.");
+        }
+        out[(*iter).first.getObj()] = (*iter).second;
+    }
+}
+
+void
 QPDFWriter::writeLinearized()
 {
     // Optimize file and enqueue objects in order
 
+    discardGeneration(this->object_to_object_stream,
+                      this->object_to_object_stream_no_gen);
+
     bool need_xref_stream = (! this->object_to_object_stream.empty());
-    pdf.optimize(this->object_to_object_stream);
+    pdf.optimize(this->object_to_object_stream_no_gen);
 
     std::vector<QPDFObjectHandle> part4;
     std::vector<QPDFObjectHandle> part6;
     std::vector<QPDFObjectHandle> part7;
     std::vector<QPDFObjectHandle> part8;
     std::vector<QPDFObjectHandle> part9;
-    pdf.getLinearizedParts(this->object_to_object_stream,
+    pdf.getLinearizedParts(this->object_to_object_stream_no_gen,
 			   part4, part6, part7, part8, part9);
 
     // Object number sequence:
@@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized()
 	for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
 	     iter != (*vecs2[i]).end(); ++iter)
 	{
-	    assignCompressedObjectNumbers((*iter).getObjectID());
+	    assignCompressedObjectNumbers((*iter).getObjGen());
 	}
     }
     int second_half_end = this->next_objid - 1;
@@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized()
 	for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
 	     iter != (*vecs1[i]).end(); ++iter)
 	{
-	    assignCompressedObjectNumbers((*iter).getObjectID());
+	    assignCompressedObjectNumbers((*iter).getObjGen());
 	}
     }
     int first_half_end = this->next_objid - 1;
@@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized()
 	if (pass == 2)
 	{
 	    std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
-	    int first_page_object = obj_renumber[pages[0].getObjectID()];
+	    int first_page_object = obj_renumber[pages[0].getObjGen()];
 	    int npages = pages.size();
 
 	    writeString(" /Linearized 1 /L ");
@@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized()
 	writeString(QUtil::int_to_string(first_xref_offset));
 	writeString("\n%%EOF\n");
 
+        discardGeneration(this->obj_renumber, this->obj_renumber_no_gen);
+
 	if (pass == 1)
 	{
 	    // Close first pass pipeline
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 31e15495..16650f73 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -262,3 +262,4 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0
 QPDFObjectHandle EOF in inline image 0
 QPDFObjectHandle inline image token 0
 QPDF not caching overridden objstm object 0
+QPDFWriter original obj non-zero gen 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 5ae2ae3c..30200ca1 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
 show_ntests();
 # ----------
 $td->notify("--- Miscellaneous Tests ---");
-$n_tests += 62;
+$n_tests += 64;
 
 $td->runtest("qpdf version",
 	     {$td->COMMAND => "qpdf --version"},
@@ -501,6 +501,14 @@ $td->runtest("overridden compressed objects",
               $td->EXIT_STATUS => 0},
              $td->NORMALIZE_NEWLINES);
 
+$td->runtest("generate object streams for gen > 0",
+             {$td->COMMAND => "qpdf --qdf --static-id" .
+		  " --object-streams=generate gen1.pdf a.pdf"},
+             {$td->STRING => "", $td->EXIT_STATUS => 0});
+$td->runtest("check file",
+             {$td->FILE => "a.pdf"},
+             {$td->FILE => "gen1.qdf"});
+
 show_ntests();
 # ----------
 $td->notify("--- Numeric range parsing tests ---");
@@ -1183,6 +1191,7 @@ my @to_linearize =
      'lin-delete-and-reuse',	# linearized, then delete and reuse
      'object-stream',		# contains object streams
      'hybrid-xref',	        # contains both xref tables and streams
+     'gen1',                    # has objects with generation > 0
      @linearized_files,		# we should be able to relinearize
      );
 
diff --git a/qpdf/qtest/qpdf/gen1.pdf b/qpdf/qtest/qpdf/gen1.pdf
new file mode 100644
index 00000000..6bfbbefe
--- /dev/null
+++ b/qpdf/qtest/qpdf/gen1.pdf
@@ -0,0 +1,79 @@
+%PDF-1.3
+1 1 obj
+<<
+  /Type /Catalog
+  /Pages 2 1 R
+>>
+endobj
+
+2 1 obj
+<<
+  /Type /Pages
+  /Kids [
+    3 1 R
+  ]
+  /Count 1
+>>
+endobj
+
+3 1 obj
+<<
+  /Type /Page
+  /Parent 2 1 R
+  /MediaBox [0 0 612 792]
+  /Contents 4 1 R
+  /Resources <<
+    /ProcSet 5 1 R
+    /Font <<
+      /F1 6 1 R
+    >>
+  >>
+>>
+endobj
+
+4 1 obj
+<<
+  /Length 44
+>>
+stream
+BT
+  /F1 24 Tf
+  72 720 Td
+  (Potato) Tj
+ET
+endstream
+endobj
+
+5 1 obj
+[
+  /PDF
+  /Text
+]
+endobj
+
+6 1 obj
+<<
+  /Type /Font
+  /Subtype /Type1
+  /Name /F1
+  /BaseFont /Helvetica
+  /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f 
+0000000009 00001 n 
+0000000063 00001 n 
+0000000135 00001 n 
+0000000307 00001 n 
+0000000403 00001 n 
+0000000438 00001 n 
+trailer <<
+  /Size 7
+  /Root 1 1 R
+>>
+startxref
+556
+%%EOF
diff --git a/qpdf/qtest/qpdf/gen1.qdf b/qpdf/qtest/qpdf/gen1.qdf
new file mode 100644
index 00000000..802bf2bc
--- /dev/null
+++ b/qpdf/qtest/qpdf/gen1.qdf
author	Jay Berkenbilt <ejb@ql.org>	2013-06-14 20:53:47 +0200
committer	Jay Berkenbilt <ejb@ql.org>	2013-06-14 20:58:09 +0200
commit	a3576a73593987b26cd3eff346f8f7c11f713cbd (patch)
tree	3db5b39d339cddacf2fc1b13ecf4c028bc0589b5
parent	96eb96511557469d50bbd55d211ced16acc31e13 (diff)
download	qpdf-a3576a73593987b26cd3eff346f8f7c11f713cbd.tar.zst