diff options
author | Jay Berkenbilt <ejb@ql.org> | 2013-06-15 18:38:25 +0200 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2013-06-15 18:40:01 +0200 |
commit | a85007cb0d9bb8af8f0a32bda3ace19aaff97816 (patch) | |
tree | 83bd0cbe40e6f793d954c1351e97830ab966203f /libqpdf | |
parent | a1d5a3e916a23ae22ee65b05df1789435a72d67c (diff) | |
download | qpdf-a85007cb0d9bb8af8f0a32bda3ace19aaff97816.tar.zst |
Handle more broken files
Space rather than newline after xref, missing /ID in trailer for
encrypted file. This enables qpdf to handle some files that xpdf can
handle. Adobe reader can't necessarily handle them.
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/QPDF.cc | 17 | ||||
-rw-r--r-- | libqpdf/QPDF_encryption.cc | 21 |
2 files changed, 28 insertions, 10 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 00e13ca0..ad742daa 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset) std::map<int, int> free_table; while (xref_offset) { + char buf[7]; + memset(buf, 0, sizeof(buf)); this->file->seek(xref_offset, SEEK_SET); - std::string line = this->file->readLine(50); - if (line == "xref") + this->file->read(buf, sizeof(buf) - 1); + // The PDF spec says xref must be followed by a line + // terminator, but files exist in the wild where it is + // terminated by arbitrary whitespace. + PCRE xref_re("^xref\\s+"); + PCRE::Match m = xref_re.match(buf); + if (m) { - xref_offset = read_xrefTable(this->file->tell()); + QTC::TC("qpdf", "QPDF xref space", + ((buf[4] == '\n') ? 0 : + (buf[4] == '\r') ? 1 : + (buf[4] == ' ') ? 2 : 9999)); + xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length()); } else { diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 51ed3072..88cd707f 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -791,17 +791,24 @@ QPDF::initializeEncryption() // encryption dictionary. this->encrypted = true; + std::string id1; QPDFObjectHandle id_obj = this->trailer.getKey("/ID"); - if (! (id_obj.isArray() && - (id_obj.getArrayNItems() == 2) && - id_obj.getArrayItem(0).isString())) + if ((id_obj.isArray() && + (id_obj.getArrayNItems() == 2) && + id_obj.getArrayItem(0).isString())) { - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), - "trailer", this->file->getLastOffset(), - "invalid /ID in trailer dictionary"); + id1 = id_obj.getArrayItem(0).getStringValue(); + } + else + { + // Treating a missing ID as the empty string enables qpdf to + // decrypt some invalid encrypted files with no /ID that + // poppler can read but Adobe Reader can't. + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), + "trailer", this->file->getLastOffset(), + "invalid /ID in trailer dictionary")); } - std::string id1 = id_obj.getArrayItem(0).getStringValue(); QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt"); if (! encryption_dict.isDictionary()) { |