aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2012-12-25 20:38:18 +0100
committerJay Berkenbilt <ejb@ql.org>2012-12-25 20:43:37 +0100
commit7f84239cad2ec58166245394e56a4647085e025e (patch)
treede91917df48f54d645c90f67a1cb1a49214b5d5f /libqpdf
parentbcfc9847beb0f059a98ef5c8c02646b43fab4272 (diff)
downloadqpdf-7f84239cad2ec58166245394e56a4647085e025e.tar.zst
Find PDF header anywhere in the first 1024 bytes
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/OffsetInputSource.cc61
-rw-r--r--libqpdf/QPDF.cc20
-rw-r--r--libqpdf/build.mk1
-rw-r--r--libqpdf/qpdf/OffsetInputSource.hh29
4 files changed, 108 insertions, 3 deletions
diff --git a/libqpdf/OffsetInputSource.cc b/libqpdf/OffsetInputSource.cc
new file mode 100644
index 00000000..c1ec4102
--- /dev/null
+++ b/libqpdf/OffsetInputSource.cc
@@ -0,0 +1,61 @@
+#include <qpdf/OffsetInputSource.hh>
+
+OffsetInputSource::OffsetInputSource(PointerHolder<InputSource> proxied,
+ qpdf_offset_t global_offset) :
+ proxied(proxied),
+ global_offset(global_offset)
+{
+}
+
+OffsetInputSource::~OffsetInputSource()
+{
+}
+
+qpdf_offset_t
+OffsetInputSource::findAndSkipNextEOL()
+{
+ return this->proxied->findAndSkipNextEOL() - this->global_offset;
+}
+
+std::string const&
+OffsetInputSource::getName() const
+{
+ return this->proxied->getName();
+}
+
+qpdf_offset_t
+OffsetInputSource::tell()
+{
+ return this->proxied->tell() - this->global_offset;
+}
+
+void
+OffsetInputSource::seek(qpdf_offset_t offset, int whence)
+{
+ if (whence == SEEK_SET)
+ {
+ this->proxied->seek(offset + global_offset, whence);
+ }
+ else
+ {
+ this->proxied->seek(offset, whence);
+ }
+}
+
+void
+OffsetInputSource::rewind()
+{
+ seek(0, SEEK_SET);
+}
+
+size_t
+OffsetInputSource::read(char* buffer, size_t length)
+{
+ return this->proxied->read(buffer, length);
+}
+
+void
+OffsetInputSource::unreadCh(char ch)
+{
+ this->proxied->unreadCh(ch);
+}
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index ccbfaf7c..ba96cb64 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -13,6 +13,7 @@
#include <qpdf/Pl_Discard.hh>
#include <qpdf/FileInputSource.hh>
#include <qpdf/BufferInputSource.hh>
+#include <qpdf/OffsetInputSource.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDF_Null.hh>
@@ -213,7 +214,7 @@ QPDF::getWarnings()
void
QPDF::parse(char const* password)
{
- PCRE header_re("^%PDF-(1.\\d+)\\b");
+ PCRE header_re("\\A((?s).*?)%PDF-(1.\\d+)\\b");
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
if (password)
@@ -221,11 +222,24 @@ QPDF::parse(char const* password)
this->provided_password = password;
}
- std::string line = this->file->readLine(20);
+ // Find the header anywhere in the first 1024 bytes of the file.
+ char buffer[1044];
+ this->file->read(buffer, sizeof(buffer));
+ std::string line(buffer);
PCRE::Match m1 = header_re.match(line.c_str());
if (m1)
{
- this->pdf_version = m1.getMatch(1);
+ size_t global_offset = m1.getMatch(1).length();
+ if (global_offset != 0)
+ {
+ // Emperical evidence strongly suggests that when there is
+ // leading material prior to the PDF header, all explicit
+ // offsets in the file are such that 0 points to the
+ // beginning of the header.
+ QTC::TC("qpdf", "QPDF global offset");
+ this->file = new OffsetInputSource(this->file, global_offset);
+ }
+ this->pdf_version = m1.getMatch(2);
if (atof(this->pdf_version.c_str()) < 1.2)
{
this->tokenizer.allowPoundAnywhereInName();
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
index 6debf107..0ad96a2d 100644
--- a/libqpdf/build.mk
+++ b/libqpdf/build.mk
@@ -12,6 +12,7 @@ SRCS_libqpdf = \
libqpdf/FileInputSource.cc \
libqpdf/InputSource.cc \
libqpdf/MD5.cc \
+ libqpdf/OffsetInputSource.cc \
libqpdf/PCRE.cc \
libqpdf/Pipeline.cc \
libqpdf/Pl_AES_PDF.cc \
diff --git a/libqpdf/qpdf/OffsetInputSource.hh b/libqpdf/qpdf/OffsetInputSource.hh
new file mode 100644
index 00000000..aedc574a
--- /dev/null
+++ b/libqpdf/qpdf/OffsetInputSource.hh
@@ -0,0 +1,29 @@
+#ifndef __QPDF_OFFSETINPUTSOURCE_HH__
+#define __QPDF_OFFSETINPUTSOURCE_HH__
+
+// This class implements an InputSource that proxies for an underlying
+// input source but offset a specific number of bytes.
+
+#include <qpdf/InputSource.hh>
+#include <qpdf/PointerHolder.hh>
+
+class OffsetInputSource: public InputSource
+{
+ public:
+ OffsetInputSource(PointerHolder<InputSource>, qpdf_offset_t global_offset);
+ virtual ~OffsetInputSource();
+
+ virtual qpdf_offset_t findAndSkipNextEOL();
+ virtual std::string const& getName() const;
+ virtual qpdf_offset_t tell();
+ virtual void seek(qpdf_offset_t offset, int whence);
+ virtual void rewind();
+ virtual size_t read(char* buffer, size_t length);
+ virtual void unreadCh(char ch);
+
+ private:
+ PointerHolder<InputSource> proxied;
+ qpdf_offset_t global_offset;
+};
+
+#endif // __QPDF_OFFSETINPUTSOURCE_HH__