From 1765c6ec20625b99451acceb1ffcaaca812f379e Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 5 Aug 2017 22:34:25 -0400 Subject: Find header without PCRE --- include/qpdf/QPDF.hh | 3 ++ libqpdf/QPDF.cc | 72 +++++++++++++++++++++++++++++++------------ qpdf/qtest/qpdf/issue-118.out | 1 + qpdf/qtest/qpdf/issue-51.out | 1 + 4 files changed, 57 insertions(+), 20 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index c9d120b4..072f4991 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1027,6 +1027,9 @@ class QPDF bool (QPDF::*checker)(); }; + // Methods to support pattern finding + bool findHeader(); + // methods to support linearization checking -- implemented in // QPDF_linearization.cc void readLinearizationData(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 3a8dc875..90ac749b 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -202,27 +202,45 @@ QPDF::getWarnings() return result; } -void -QPDF::parse(char const* password) +bool +QPDF::findHeader() { - PCRE header_re("\\A((?s).*?)%PDF-(\\d+.\\d+)\\b"); - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); - - if (password) - { - this->provided_password = password; + qpdf_offset_t global_offset = this->file->tell(); + std::string line = this->file->readLine(1024); + char const* p = line.c_str(); + if (strncmp(p, "%PDF-", 5) != 0) + { + throw std::logic_error("findHeader is not looking at %PDF-"); + } + p += 5; + std::string version; + // Note: The string returned by line.c_str() is always + // null-terminated. The code below never overruns the buffer + // because a null character always short-circuits further + // advancement. + bool valid = QUtil::is_digit(*p); + if (valid) + { + while (QUtil::is_digit(*p)) + { + version.append(1, *p++); + } + if ((*p == '.') && QUtil::is_digit(*(p+1))) + { + version.append(1, *p++); + while (QUtil::is_digit(*p)) + { + version.append(1, *p++); + } + } + else + { + valid = false; + } } - - // Find the header anywhere in the first 1024 bytes of the file, - // plus add a little extra space for the header itself. - char buffer[1045]; - memset(buffer, '\0', sizeof(buffer)); - this->file->read(buffer, sizeof(buffer) - 1); - std::string line(buffer); - PCRE::Match m1 = header_re.match(line.c_str()); - if (m1) + if (valid) { - size_t global_offset = m1.getMatch(1).length(); + this->pdf_version = version; if (global_offset != 0) { // Empirical evidence strongly suggests that when there is @@ -232,9 +250,23 @@ QPDF::parse(char const* password) QTC::TC("qpdf", "QPDF global offset"); this->file = new OffsetInputSource(this->file, global_offset); } - this->pdf_version = m1.getMatch(2); } - else + return valid; +} + +void +QPDF::parse(char const* password) +{ + PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); + + if (password) + { + this->provided_password = password; + } + + // Find the header anywhere in the first 1024 bytes of the file. + PatternFinder hf(*this, &QPDF::findHeader); + if (! this->file->findFirst("%PDF-", 0, 1024, hf)) { QTC::TC("qpdf", "QPDF not a pdf file"); warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), diff --git a/qpdf/qtest/qpdf/issue-118.out b/qpdf/qtest/qpdf/issue-118.out index 58b0778c..18c20b20 100644 --- a/qpdf/qtest/qpdf/issue-118.out +++ b/qpdf/qtest/qpdf/issue-118.out @@ -1,3 +1,4 @@ +WARNING: issue-118.pdf: can't find PDF header WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0 WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream issue-118.pdf (file position 732): unable to find /Root dictionary diff --git a/qpdf/qtest/qpdf/issue-51.out b/qpdf/qtest/qpdf/issue-51.out index 528c2189..d291fee3 100644 --- a/qpdf/qtest/qpdf/issue-51.out +++ b/qpdf/qtest/qpdf/issue-51.out @@ -1,3 +1,4 @@ +WARNING: issue-51.pdf: can't find PDF header WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9) WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj -- cgit v1.2.3-54-g00ecf