aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-08-12 23:26:40 +0200
committerJay Berkenbilt <ejb@ql.org>2018-08-12 23:45:43 +0200
commit3d6615b2764da759c5b9354c40cd1a32b84ee039 (patch)
tree379048c173cf170100f7d88099dd2758be76f124
parent48331b4bdc5b63465604a1201a3b66ea8a793d51 (diff)
downloadqpdf-3d6615b2764da759c5b9354c40cd1a32b84ee039.tar.zst
Pl_Buffer: reduce memory growth (fixes #228)
Rather than keeping a list of buffers for every write, accumulate bytes in a single buffer, doubling the size of the buffer when needed to accommodate new data. This is not the best possible implementation, but the change was implemented in this way to avoid changing the shape of Pl_Buffer and thus breaking backward compatibility.
-rw-r--r--ChangeLog8
-rw-r--r--TODO12
-rw-r--r--libqpdf/Pl_Buffer.cc43
3 files changed, 50 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 0fa9d10b..7b1f63c0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2018-08-12 Jay Berkenbilt <ejb@ql.org>
+
+ * Rewrite the internals of Pl_Buffer to be much more efficient in
+ use of memory at a very slight performance cost. The old
+ implementation could cause memory usage to go out of control for
+ files with large images compressed using the TIFF predictor.
+ Fixes #228.
+
2018-08-05 Jay Berkenbilt <ejb@ql.org>
* Bug fix: end of line characters were not properly handled inside
diff --git a/TODO b/TODO
index 3c17bafb..2c8f779a 100644
--- a/TODO
+++ b/TODO
@@ -31,6 +31,16 @@ Soon
- See ../misc/broken-files
+Next ABI
+========
+
+Do these things next time we have to break binary compatibility
+
+ * Pl_Buffer's internal structure is not right for what it does. It
+ was modified for greater efficiency, but it was done in a way that
+ preserved binary compatibility, so the implementation is a bit
+ convoluted.
+
Lexical
=======
@@ -72,6 +82,8 @@ directory or that are otherwise not publicly accessible. This includes
things sent to me by email that are specifically not public. Even so,
I find it useful to make reference to them in this list
+ * Pl_TIFFPredictor is pretty slow.
+
* Some test cases on bad fails fail because qpdf is unable to find
the root dictionary when it fails to read the trailer. Recovery
could find the root dictionary and even the info dictionary in
diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc
index 45c0c862..1ca9292d 100644
--- a/libqpdf/Pl_Buffer.cc
+++ b/libqpdf/Pl_Buffer.cc
@@ -17,11 +17,32 @@ Pl_Buffer::~Pl_Buffer()
void
Pl_Buffer::write(unsigned char* buf, size_t len)
{
- Buffer* b = new Buffer(len);
- memcpy(b->getBuffer(), buf, len);
- this->data.push_back(b);
+ PointerHolder<Buffer> cur_buf;
+ size_t cur_size = 0;
+ if (! this->data.empty())
+ {
+ cur_buf = this->data.back();
+ cur_size = cur_buf->getSize();
+ }
+ size_t left = cur_size - this->total_size;
+ if (left < len)
+ {
+ size_t new_size = std::max(this->total_size + len, 2 * cur_size);
+ Buffer* b = new Buffer(new_size);
+ if (cur_buf.getPointer())
+ {
+ memcpy(b->getBuffer(), cur_buf->getBuffer(), this->total_size);
+ }
+ this->data.clear();
+ cur_buf = b;
+ this->data.push_back(cur_buf);
+ }
+ if (len)
+ {
+ memcpy(cur_buf->getBuffer() + this->total_size, buf, len);
+ this->total_size += len;
+ }
this->ready = false;
- this->total_size += len;
if (getNext(true))
{
@@ -49,17 +70,13 @@ Pl_Buffer::getBuffer()
Buffer* b = new Buffer(this->total_size);
unsigned char* p = b->getBuffer();
- while (! this->data.empty())
+ if (! this->data.empty())
{
- PointerHolder<Buffer> bp = this->data.front();
- this->data.pop_front();
- size_t bytes = bp->getSize();
- memcpy(p, bp->getBuffer(), bytes);
- p += bytes;
- this->total_size -= bytes;
+ PointerHolder<Buffer> bp = this->data.back();
+ this->data.clear();
+ memcpy(p, bp->getBuffer(), this->total_size);
}
-
- assert(this->total_size == 0);
+ this->total_size = 0;
this->ready = false;
return b;