diff options
author | James R. Barlow <james@purplerock.ca> | 2020-06-25 09:50:49 +0200 |
---|---|---|
committer | Jay Berkenbilt <jberkenbilt@users.noreply.github.com> | 2020-10-17 01:57:29 +0200 |
commit | 3fc7c99d02f9ba3045a7f085bf8f74132c174b59 (patch) | |
tree | 3e6171c0bffed9ccfc49d5dc87a11744cc6e84a2 | |
parent | 3221022fc9f10a3dd28a46e333485cfe3597c5c4 (diff) | |
download | qpdf-3fc7c99d02f9ba3045a7f085bf8f74132c174b59.tar.zst |
Replace memchr with manual memory search
On large files with predominantly \n line endings, memchr(..'\r'..)
seems to waste a considerable amount of time searching for a line
ending candidate that we don't need.
On the Adobe PDF Reference Manual 1.7, this commit is 8x faster at
QPDF::processMemoryFile().
-rw-r--r-- | libqpdf/BufferInputSource.cc | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc index cbb2e7f0..9e141510 100644 --- a/libqpdf/BufferInputSource.cc +++ b/libqpdf/BufferInputSource.cc @@ -61,14 +61,15 @@ BufferInputSource::findAndSkipNextEOL() } qpdf_offset_t result = 0; - size_t len = QIntC::to_size(end_pos - this->m->cur_offset); unsigned char const* buffer = this->m->buf->getBuffer(); + unsigned char const* end = buffer + end_pos; + unsigned char const* p = buffer + this->m->cur_offset; - void* start = const_cast<unsigned char*>(buffer) + this->m->cur_offset; - unsigned char* p1 = static_cast<unsigned char*>(memchr(start, '\r', len)); - unsigned char* p2 = static_cast<unsigned char*>(memchr(start, '\n', len)); - unsigned char* p = (p1 && p2) ? std::min(p1, p2) : p1 ? p1 : p2; - if (p) + while ((p < end) && !((*p == '\r') || (*p == '\n'))) + { + ++p; + } + if (p < end) { result = p - buffer; this->m->cur_offset = result + 1; |