From a85007cb0d9bb8af8f0a32bda3ace19aaff97816 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 15 Jun 2013 12:38:25 -0400 Subject: Handle more broken files Space rather than newline after xref, missing /ID in trailer for encrypted file. This enables qpdf to handle some files that xpdf can handle. Adobe reader can't necessarily handle them. --- libqpdf/QPDF.cc | 17 ++++++++++++++--- libqpdf/QPDF_encryption.cc | 21 ++++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 00e13ca0..ad742daa 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset) std::map free_table; while (xref_offset) { + char buf[7]; + memset(buf, 0, sizeof(buf)); this->file->seek(xref_offset, SEEK_SET); - std::string line = this->file->readLine(50); - if (line == "xref") + this->file->read(buf, sizeof(buf) - 1); + // The PDF spec says xref must be followed by a line + // terminator, but files exist in the wild where it is + // terminated by arbitrary whitespace. + PCRE xref_re("^xref\\s+"); + PCRE::Match m = xref_re.match(buf); + if (m) { - xref_offset = read_xrefTable(this->file->tell()); + QTC::TC("qpdf", "QPDF xref space", + ((buf[4] == '\n') ? 0 : + (buf[4] == '\r') ? 1 : + (buf[4] == ' ') ? 2 : 9999)); + xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length()); } else { diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 51ed3072..88cd707f 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -791,17 +791,24 @@ QPDF::initializeEncryption() // encryption dictionary. this->encrypted = true; + std::string id1; QPDFObjectHandle id_obj = this->trailer.getKey("/ID"); - if (! (id_obj.isArray() && - (id_obj.getArrayNItems() == 2) && - id_obj.getArrayItem(0).isString())) + if ((id_obj.isArray() && + (id_obj.getArrayNItems() == 2) && + id_obj.getArrayItem(0).isString())) { - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), - "trailer", this->file->getLastOffset(), - "invalid /ID in trailer dictionary"); + id1 = id_obj.getArrayItem(0).getStringValue(); + } + else + { + // Treating a missing ID as the empty string enables qpdf to + // decrypt some invalid encrypted files with no /ID that + // poppler can read but Adobe Reader can't. + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), + "trailer", this->file->getLastOffset(), + "invalid /ID in trailer dictionary")); } - std::string id1 = id_obj.getArrayItem(0).getStringValue(); QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt"); if (! encryption_dict.isDictionary()) { -- cgit v1.2.3-70-g09d2