From 42d396f1dd8d38294e45b14021cd72c13850a53b Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 18 Aug 2019 21:26:19 -0400 Subject: Handle invalid name tokens symmetrically for PDF < 1.2 (fixes #332) --- libqpdf/QPDF.cc | 4 ---- libqpdf/QPDFObjectHandle.cc | 31 ++++++++++++++++++++----------- libqpdf/QPDFTokenizer.cc | 35 +++++++++++++++-------------------- libqpdf/QPDF_Name.cc | 10 ++++++++-- 4 files changed, 43 insertions(+), 37 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 5a03facc..23545d1a 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -374,10 +374,6 @@ QPDF::parse(char const* password) // version 1.2 for /FlateDecode this->m->pdf_version = "1.2"; } - if (atof(this->m->pdf_version.c_str()) < 1.2) - { - this->m->tokenizer.allowPoundAnywhereInName(); - } // PDF spec says %%EOF must be found within the last 1024 bytes of // the file. We add an extra 30 characters to leave room for the diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 3fa0266c..1b3b64b0 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1616,7 +1616,8 @@ QPDFObjectHandle::parseContentStream_internal( PointerHolder stream_data = buf.getBuffer(); try { - parseContentStream_data(stream_data, all_description, callbacks); + parseContentStream_data(stream_data, all_description, + callbacks, getOwningQPDF()); } catch (TerminateParsing&) { @@ -1629,7 +1630,8 @@ void QPDFObjectHandle::parseContentStream_data( PointerHolder stream_data, std::string const& description, - ParserCallbacks* callbacks) + ParserCallbacks* callbacks, + QPDF* context) { size_t length = stream_data->getSize(); PointerHolder input = @@ -1640,7 +1642,8 @@ QPDFObjectHandle::parseContentStream_data( while (QIntC::to_size(input->tell()) < length) { QPDFObjectHandle obj = - parseInternal(input, "content", tokenizer, empty, 0, 0, true); + parseInternal(input, "content", tokenizer, + empty, 0, context, true); if (! obj.isInitialized()) { // EOF @@ -1660,9 +1663,10 @@ QPDFObjectHandle::parseContentStream_data( if (t.getType() == QPDFTokenizer::tt_bad) { QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image"); - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), - "stream data", input->tell(), - "EOF found while reading inline image"); + warn(context, + QPDFExc(qpdf_e_damaged_pdf, input->getName(), + "stream data", input->tell(), + "EOF found while reading inline image")); } else { @@ -1737,6 +1741,16 @@ QPDFObjectHandle::parseInternal(PointerHolder input, QPDFTokenizer::Token token = tokenizer.readToken(input, object_description, true); + std::string const& token_error_message = token.getErrorMessage(); + if (! token_error_message.empty()) + { + // Tokens other than tt_bad can still generate warnings. + warn(context, + QPDFExc(qpdf_e_damaged_pdf, input->getName(), + object_description, + input->getLastOffset(), + token_error_message)); + } switch (token.getType()) { @@ -1756,11 +1770,6 @@ QPDFObjectHandle::parseInternal(PointerHolder input, case QPDFTokenizer::tt_bad: QTC::TC("qpdf", "QPDFObjectHandle bad token in parse"); - warn(context, - QPDFExc(qpdf_e_damaged_pdf, input->getName(), - object_description, - input->getLastOffset(), - token.getErrorMessage())); bad = true; object = newNull(); break; diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 54c9825e..18af12e0 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -78,7 +78,6 @@ QPDFWordTokenFinder::check() } QPDFTokenizer::Members::Members() : - pound_special_in_name(true), allow_eof(false), include_ignorable(false) { @@ -128,13 +127,6 @@ QPDFTokenizer::QPDFTokenizer() : { } -void -QPDFTokenizer::allowPoundAnywhereInName() -{ - QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name"); - this->m->pound_special_in_name = false; -} - void QPDFTokenizer::allowEOF() { @@ -169,17 +161,19 @@ QPDFTokenizer::resolveLiteral() // valid name, so don't strip leading /. That way we // don't have to deal with the empty string as a name. std::string nval = "/"; - char const* valstr = this->m->val.c_str() + 1; - for (char const* p = valstr; *p; ++p) + size_t len = this->m->val.length(); + for (size_t i = 1; i < len; ++i) { - if ((*p == '#') && this->m->pound_special_in_name) + char ch = this->m->val.at(i); + if (ch == '#') { - if (p[1] && p[2] && - QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2])) + if ((i + 2 < len) && + QUtil::is_hex_digit(this->m->val.at(i+1)) && + QUtil::is_hex_digit(this->m->val.at(i+2))) { char num[3]; - num[0] = p[1]; - num[1] = p[2]; + num[0] = this->m->val.at(i+1); + num[1] = this->m->val.at(i+2); num[2] = '\0'; char ch = static_cast(strtol(num, 0, 16)); if (ch == '\0') @@ -192,21 +186,22 @@ QPDFTokenizer::resolveLiteral() } else { - nval += ch; + nval.append(1, ch); } - p += 2; + i += 2; } else { QTC::TC("qpdf", "QPDFTokenizer bad name"); - this->m->type = tt_bad; this->m->error_message = "invalid name token"; - nval += *p; + // Use null to encode a bad # -- this is reversed + // in QPDF_Name::normalizeName. + nval += '\0'; } } else { - nval += *p; + nval.append(1, ch); } } this->m->val = nval; diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc index 290fb067..ffd21af2 100644 --- a/libqpdf/QPDF_Name.cc +++ b/libqpdf/QPDF_Name.cc @@ -22,11 +22,17 @@ QPDF_Name::normalizeName(std::string const& name) } std::string result; result += name.at(0); - for (unsigned int i = 1; i < name.length(); ++i) + for (size_t i = 1; i < name.length(); ++i) { char ch = name.at(i); // Don't use locale/ctype here; follow PDF spec guidelines. - if (strchr("#()<>[]{}/%", ch) || (ch < 33) || (ch > 126)) + if (ch == '\0') + { + // QPDFTokenizer embeds a null character to encode an + // invalid #. + result += "#"; + } + else if (strchr("#()<>[]{}/%", ch) || (ch < 33) || (ch > 126)) { result += "#" + QUtil::hex_encode(std::string(&ch, 1)); } -- cgit v1.2.3-54-g00ecf