From 7f84239cad2ec58166245394e56a4647085e025e Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 25 Dec 2012 14:38:18 -0500 Subject: Find PDF header anywhere in the first 1024 bytes --- libqpdf/OffsetInputSource.cc | 61 +++++++++++++++++++++++++++++++++++++++ libqpdf/QPDF.cc | 20 +++++++++++-- libqpdf/build.mk | 1 + libqpdf/qpdf/OffsetInputSource.hh | 29 +++++++++++++++++++ 4 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 libqpdf/OffsetInputSource.cc create mode 100644 libqpdf/qpdf/OffsetInputSource.hh (limited to 'libqpdf') diff --git a/libqpdf/OffsetInputSource.cc b/libqpdf/OffsetInputSource.cc new file mode 100644 index 00000000..c1ec4102 --- /dev/null +++ b/libqpdf/OffsetInputSource.cc @@ -0,0 +1,61 @@ +#include + +OffsetInputSource::OffsetInputSource(PointerHolder proxied, + qpdf_offset_t global_offset) : + proxied(proxied), + global_offset(global_offset) +{ +} + +OffsetInputSource::~OffsetInputSource() +{ +} + +qpdf_offset_t +OffsetInputSource::findAndSkipNextEOL() +{ + return this->proxied->findAndSkipNextEOL() - this->global_offset; +} + +std::string const& +OffsetInputSource::getName() const +{ + return this->proxied->getName(); +} + +qpdf_offset_t +OffsetInputSource::tell() +{ + return this->proxied->tell() - this->global_offset; +} + +void +OffsetInputSource::seek(qpdf_offset_t offset, int whence) +{ + if (whence == SEEK_SET) + { + this->proxied->seek(offset + global_offset, whence); + } + else + { + this->proxied->seek(offset, whence); + } +} + +void +OffsetInputSource::rewind() +{ + seek(0, SEEK_SET); +} + +size_t +OffsetInputSource::read(char* buffer, size_t length) +{ + return this->proxied->read(buffer, length); +} + +void +OffsetInputSource::unreadCh(char ch) +{ + this->proxied->unreadCh(ch); +} diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index ccbfaf7c..ba96cb64 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -213,7 +214,7 @@ QPDF::getWarnings() void QPDF::parse(char const* password) { - PCRE header_re("^%PDF-(1.\\d+)\\b"); + PCRE header_re("\\A((?s).*?)%PDF-(1.\\d+)\\b"); PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); if (password) @@ -221,11 +222,24 @@ QPDF::parse(char const* password) this->provided_password = password; } - std::string line = this->file->readLine(20); + // Find the header anywhere in the first 1024 bytes of the file. + char buffer[1044]; + this->file->read(buffer, sizeof(buffer)); + std::string line(buffer); PCRE::Match m1 = header_re.match(line.c_str()); if (m1) { - this->pdf_version = m1.getMatch(1); + size_t global_offset = m1.getMatch(1).length(); + if (global_offset != 0) + { + // Emperical evidence strongly suggests that when there is + // leading material prior to the PDF header, all explicit + // offsets in the file are such that 0 points to the + // beginning of the header. + QTC::TC("qpdf", "QPDF global offset"); + this->file = new OffsetInputSource(this->file, global_offset); + } + this->pdf_version = m1.getMatch(2); if (atof(this->pdf_version.c_str()) < 1.2) { this->tokenizer.allowPoundAnywhereInName(); diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 6debf107..0ad96a2d 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -12,6 +12,7 @@ SRCS_libqpdf = \ libqpdf/FileInputSource.cc \ libqpdf/InputSource.cc \ libqpdf/MD5.cc \ + libqpdf/OffsetInputSource.cc \ libqpdf/PCRE.cc \ libqpdf/Pipeline.cc \ libqpdf/Pl_AES_PDF.cc \ diff --git a/libqpdf/qpdf/OffsetInputSource.hh b/libqpdf/qpdf/OffsetInputSource.hh new file mode 100644 index 00000000..aedc574a --- /dev/null +++ b/libqpdf/qpdf/OffsetInputSource.hh @@ -0,0 +1,29 @@ +#ifndef __QPDF_OFFSETINPUTSOURCE_HH__ +#define __QPDF_OFFSETINPUTSOURCE_HH__ + +// This class implements an InputSource that proxies for an underlying +// input source but offset a specific number of bytes. + +#include +#include + +class OffsetInputSource: public InputSource +{ + public: + OffsetInputSource(PointerHolder, qpdf_offset_t global_offset); + virtual ~OffsetInputSource(); + + virtual qpdf_offset_t findAndSkipNextEOL(); + virtual std::string const& getName() const; + virtual qpdf_offset_t tell(); + virtual void seek(qpdf_offset_t offset, int whence); + virtual void rewind(); + virtual size_t read(char* buffer, size_t length); + virtual void unreadCh(char ch); + + private: + PointerHolder proxied; + qpdf_offset_t global_offset; +}; + +#endif // __QPDF_OFFSETINPUTSOURCE_HH__ -- cgit v1.2.3-54-g00ecf