Security: avoid pre-allocating vectors based on file data

In places where std::vector<T>(size_t) was used, either validate that the size parameter is sane or refactor code to avoid the need to pre-allocate the vector.
author: Jay Berkenbilt <ejb@ql.org> 2013-10-05 17:30:27 +0200
committer: Jay Berkenbilt <ejb@ql.org> 2013-10-10 02:57:14 +0200
commit: 0bfe9024893ebb1f62108fe6c24410e6ba589c3e (patch)
tree: fdfce5d27bfb482042105e1ba61d46e56294905e
parent: 10bceb552f1cfd2ddae3c8bfd7d9b38a66e710c4 (diff)
download: qpdf-0bfe9024893ebb1f62108fe6c24410e6ba589c3e.tar.zst
5 files changed, 55 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index 124a086d..f87c4418 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2013-10-05  Jay Berkenbilt  <ejb@ql.org>
 
+	* Security fix: In places where std::vector<T>(size_t) was used,
+	either validate that the size parameter is sane or refactor code
+	to avoid the need to pre-allocate the vector.  This reduces the
+	likelihood of allocating a lot of memory in response to invalid
+	data in linearization hint streams.
+
 	* Security fix: sanitize /W array in cross reference stream to
 	avoid a potential integer overflow in a multiplication.  It is
 	unlikely that any exploits were possible from this bug as
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
index dd09b1c0..7d2560d3 100644
--- a/libqpdf/QPDF_linearization.cc
+++ b/libqpdf/QPDF_linearization.cc
@@ -23,13 +23,22 @@ static void
 load_vector_int(BitStream& bit_stream, int nitems, std::vector<T>& vec,
 		int bits_wanted, int_type T::*field)
 {
+    bool append = vec.empty();
     // nitems times, read bits_wanted from the given bit stream,
     // storing results in the ith vector entry.
 
     for (int i = 0; i < nitems; ++i)
     {
+        if (append)
+        {
+            vec.push_back(T());
+        }
 	vec[i].*field = bit_stream.getBits(bits_wanted);
     }
+    if (static_cast<int>(vec.size()) != nitems)
+    {
+        throw std::logic_error("vector has wrong size in load_vector_int");
+    }
     // The PDF spec says that each hint table starts at a byte
     // boundary.  Each "row" actually must start on a byte boundary.
     bit_stream.skipToNextByte();
@@ -255,6 +264,17 @@ QPDF::readLinearizationData()
 
     // Store linearization parameter data
 
+    // Various places in the code use linp.npages, which is
+    // initialized from N, to pre-allocate memory, so make sure it's
+    // accurate and bail right now if it's not.
+    if (N.getIntValue() != static_cast<long long>(getAllPages().size()))
+    {
+        throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
+                      "linearization hint table",
+                      this->file->getLastOffset(),
+                      "/N does not match number of pages");
+    }
+
     // file_size initialized by isLinearized()
     this->linp.first_page_object = O.getIntValue();
     this->linp.first_page_end = E.getIntValue();
@@ -396,10 +416,9 @@ QPDF::readHPageOffset(BitStream h)
     t.nbits_shared_numerator = h.getBits(16);		    // 12
     t.shared_denominator = h.getBits(16);		    // 13
 
-    unsigned int nitems = this->linp.npages;
     std::vector<HPageOffsetEntry>& entries = t.entries;
-    entries = std::vector<HPageOffsetEntry>(nitems);
-
+    entries.clear();
+    unsigned int nitems = this->linp.npages;
     load_vector_int(h, nitems, entries,
 		    t.nbits_delta_nobjects,
 		    &HPageOffsetEntry::delta_nobjects);
@@ -441,10 +460,9 @@ QPDF::readHSharedObject(BitStream h)
     QTC::TC("qpdf", "QPDF lin nshared_total > nshared_first_page",
 	    (t.nshared_total > t.nshared_first_page) ? 1 : 0);
 
-    int nitems = t.nshared_total;
     std::vector<HSharedObjectEntry>& entries = t.entries;
-    entries = std::vector<HSharedObjectEntry>(nitems);
-
+    entries.clear();
+    int nitems = t.nshared_total;
     load_vector_int(h, nitems, entries,
 		    t.nbits_delta_group_length,
 		    &HSharedObjectEntry::delta_group_length);
@@ -1466,8 +1484,11 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
     // validation code can compute them relatively easily given the
     // rest of the information.
 
+    // npages is the size of the existing pages vector, which has been
+    // created by traversing the pages tree, and as such is a
+    // reasonable size.
     this->c_linp.npages = npages;
-    this->c_page_offset_data.entries = 	std::vector<CHPageOffsetEntry>(npages);
+    this->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(npages);
 
     // Part 4: open document objects.  We don't care about the order.
 
@@ -1861,6 +1882,7 @@ QPDF::calculateHPageOffset(
 
     HPageOffset& ph = this->page_offset_hints;
     std::vector<HPageOffsetEntry>& phe = ph.entries;
+    // npages is the size of the existing pages array.
     phe = std::vector<HPageOffsetEntry>(npages);
 
     for (unsigned int i = 0; i < npages; ++i)
@@ -1935,7 +1957,7 @@ QPDF::calculateHSharedObject(
     std::vector<CHSharedObjectEntry>& csoe = cso.entries;
     HSharedObject& so = this->shared_object_hints;
     std::vector<HSharedObjectEntry>& soe = so.entries;
-    soe = std::vector<HSharedObjectEntry>(cso.nshared_total);
+    soe.clear();
 
     int min_length = outputLengthNextN(
 	csoe[0].object, 1, lengths, obj_renumber);
@@ -1948,8 +1970,13 @@ QPDF::calculateHSharedObject(
 	    csoe[i].object, 1, lengths, obj_renumber);
 	min_length = std::min(min_length, length);
 	max_length = std::max(max_length, length);
+        soe.push_back(HSharedObjectEntry());
 	soe[i].delta_group_length = length;
     }
+    if (soe.size() != static_cast<size_t>(cso.nshared_total))
+    {
+        throw std::logic_error("soe has wrong size after initialization");
+    }
 
     so.nshared_total = cso.nshared_total;
     so.nshared_first_page = cso.nshared_first_page;
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 87c9d8c1..d07a54c5 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
 show_ntests();
 # ----------
 $td->notify("--- Miscellaneous Tests ---");
-$n_tests += 69;
+$n_tests += 70;
 
 $td->runtest("qpdf version",
 	     {$td->COMMAND => "qpdf --version"},
@@ -537,6 +537,13 @@ $td->runtest("bounds check linearization data 2",
              {$td->FILE => "linearization-bounds-2.out",
               $td->EXIT_STATUS => 2},
              $td->NORMALIZE_NEWLINES);
+# Throws logic error, not bad_alloc
+$td->runtest("sanity check array size",
+             {$td->COMMAND =>
+                  "qpdf --check linearization-large-vector-alloc.pdf"},
+             {$td->FILE => "linearization-large-vector-alloc.out",
+              $td->EXIT_STATUS => 2},
+             $td->NORMALIZE_NEWLINES);
 
 show_ntests();
 # ----------
diff --git a/qpdf/qtest/qpdf/linearization-large-vector-alloc.out b/qpdf/qtest/qpdf/linearization-large-vector-alloc.out
new file mode 100644
index 00000000..2c807d3c
--- /dev/null
+++ b/qpdf/qtest/qpdf/linearization-large-vector-alloc.out
@@ -0,0 +1,6 @@
+checking linearization-large-vector-alloc.pdf
+PDF Version: 1.3
+File is not encrypted
+File is linearized
+WARNING: linearization-large-vector-alloc.pdf (linearization hint stream: object 62 0, file position 1183): attempting to recover stream length
+overflow reading bit stream
diff --git a/qpdf/qtest/qpdf/linearization-large-vector-alloc.pdf b/qpdf/qtest/qpdf/linearization-large-vector-alloc.pdf
new file mode 100644
index 00000000..1807b08f
--- /dev/null
+++ b/qpdf/qtest/qpdf/linearization-large-vector-alloc.pdf
author	Jay Berkenbilt <ejb@ql.org>	2013-10-05 17:30:27 +0200
committer	Jay Berkenbilt <ejb@ql.org>	2013-10-10 02:57:14 +0200
commit	0bfe9024893ebb1f62108fe6c24410e6ba589c3e (patch)
tree	fdfce5d27bfb482042105e1ba61d46e56294905e
parent	10bceb552f1cfd2ddae3c8bfd7d9b38a66e710c4 (diff)
download	qpdf-0bfe9024893ebb1f62108fe6c24410e6ba589c3e.tar.zst