From 7f84239cad2ec58166245394e56a4647085e025e Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 25 Dec 2012 14:38:18 -0500 Subject: Find PDF header anywhere in the first 1024 bytes --- libqpdf/QPDF.cc | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'libqpdf/QPDF.cc') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index ccbfaf7c..ba96cb64 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -213,7 +214,7 @@ QPDF::getWarnings() void QPDF::parse(char const* password) { - PCRE header_re("^%PDF-(1.\\d+)\\b"); + PCRE header_re("\\A((?s).*?)%PDF-(1.\\d+)\\b"); PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); if (password) @@ -221,11 +222,24 @@ QPDF::parse(char const* password) this->provided_password = password; } - std::string line = this->file->readLine(20); + // Find the header anywhere in the first 1024 bytes of the file. + char buffer[1044]; + this->file->read(buffer, sizeof(buffer)); + std::string line(buffer); PCRE::Match m1 = header_re.match(line.c_str()); if (m1) { - this->pdf_version = m1.getMatch(1); + size_t global_offset = m1.getMatch(1).length(); + if (global_offset != 0) + { + // Emperical evidence strongly suggests that when there is + // leading material prior to the PDF header, all explicit + // offsets in the file are such that 0 points to the + // beginning of the header. + QTC::TC("qpdf", "QPDF global offset"); + this->file = new OffsetInputSource(this->file, global_offset); + } + this->pdf_version = m1.getMatch(2); if (atof(this->pdf_version.c_str()) < 1.2) { this->tokenizer.allowPoundAnywhereInName(); -- cgit v1.2.3-70-g09d2