From eb49e07c0afc1b30b53d3ae2849c824c9407f00d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 31 Jan 2019 15:51:52 -0500 Subject: Make inline image token exactly contain the image data Do not include the trailing EI, and handle cases where EI is not preceded by a delimiter. Such cases have been seen in the wild. --- libqpdf/QPDFObjectHandle.cc | 11 +---------- libqpdf/QPDFPageObjectHelper.cc | 10 +++++----- libqpdf/QPDFTokenizer.cc | 11 +---------- 3 files changed, 7 insertions(+), 25 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index de5d56b3..a3a4d61d 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1570,16 +1570,7 @@ QPDFObjectHandle::parseContentStream_data( } else { - // Skip back over EI - input->seek(-2, SEEK_CUR); - std::string inline_image = t.getRawValue(); - for (int i = 0; i < 3; ++i) - { - if (inline_image.length() > 0) - { - inline_image.erase(inline_image.length() - 1); - } - } + std::string inline_image = t.getValue(); QTC::TC("qpdf", "QPDFObjectHandle inline image token"); callbacks->handleObject( QPDFObjectHandle::newInlineImage(inline_image)); diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index caaff853..e72b4c70 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -228,10 +228,6 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const& token) { std::string image_data(token.getValue()); size_t len = image_data.length(); - // The token ends with delimiter followed by EI, so it - // will always be at least 3 bytes long. We want to - // exclude the EI and preceding delimiter. - len = (len >= 3 ? len - 3 : 0); if (len >= this->min_size) { QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image"); @@ -256,14 +252,18 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const& token) QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image"); write(bi_str); writeToken(token); + state = st_top; } - state = st_top; } else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")) { bi_str += token.getValue(); dict_str += " >>"; } + else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI")) + { + state = st_top; + } else { bi_str += token.getValue(); diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 2671fcbb..80fcf347 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -73,15 +73,6 @@ QPDFWordTokenFinder::check() // beginning of the input. return false; } - is->seek(token_start - 1, SEEK_SET); - char prev; - bool prev_okay = ((is->read(&prev, 1) == 1) && is_delimiter(prev)); - is->seek(pos, SEEK_SET); - if (! prev_okay) - { - QTC::TC("qpdf", "QPDFTokenizer finder word not preceded by delimiter"); - return false; - } return true; } @@ -724,7 +715,7 @@ QPDFTokenizer::findEI(PointerHolder input) { break; } - this->m->inline_image_bytes = input->tell() - pos; + this->m->inline_image_bytes = input->tell() - pos - 2; QPDFTokenizer check; bool found_bad = false; -- cgit v1.2.3-54-g00ecf