aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/qpdf/BufferInputSource.hh10
-rw-r--r--include/qpdf/FileInputSource.hh11
-rw-r--r--include/qpdf/InputSource.hh35
-rw-r--r--include/qpdf/QPDF.hh21
-rw-r--r--libqpdf/InputSource.cc167
-rw-r--r--libqpdf/QPDF_linearization.cc2
-rw-r--r--libtests/build.mk1
-rw-r--r--libtests/input_source.cc108
-rw-r--r--libtests/libtests.testcov8
-rw-r--r--libtests/qtest/input_source.test26
-rw-r--r--libtests/qtest/input_source/input_source.out14
11 files changed, 402 insertions, 1 deletions
diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh
index 64ee4605..db055783 100644
--- a/include/qpdf/BufferInputSource.hh
+++ b/include/qpdf/BufferInputSource.hh
@@ -15,17 +15,27 @@
class BufferInputSource: public InputSource
{
public:
+ QPDF_DLL
BufferInputSource(std::string const& description, Buffer* buf,
bool own_memory = false);
+ QPDF_DLL
BufferInputSource(std::string const& description,
std::string const& contents);
+ QPDF_DLL
virtual ~BufferInputSource();
+ QPDF_DLL
virtual qpdf_offset_t findAndSkipNextEOL();
+ QPDF_DLL
virtual std::string const& getName() const;
+ QPDF_DLL
virtual qpdf_offset_t tell();
+ QPDF_DLL
virtual void seek(qpdf_offset_t offset, int whence);
+ QPDF_DLL
virtual void rewind();
+ QPDF_DLL
virtual size_t read(char* buffer, size_t length);
+ QPDF_DLL
virtual void unreadCh(char ch);
private:
diff --git a/include/qpdf/FileInputSource.hh b/include/qpdf/FileInputSource.hh
index 64457365..3f0c05a9 100644
--- a/include/qpdf/FileInputSource.hh
+++ b/include/qpdf/FileInputSource.hh
@@ -14,16 +14,27 @@
class FileInputSource: public InputSource
{
public:
+ QPDF_DLL
FileInputSource();
+ QPDF_DLL
void setFilename(char const* filename);
+ QPDF_DLL
void setFile(char const* description, FILE* filep, bool close_file);
+ QPDF_DLL
virtual ~FileInputSource();
+ QPDF_DLL
virtual qpdf_offset_t findAndSkipNextEOL();
+ QPDF_DLL
virtual std::string const& getName() const;
+ QPDF_DLL
virtual qpdf_offset_t tell();
+ QPDF_DLL
virtual void seek(qpdf_offset_t offset, int whence);
+ QPDF_DLL
virtual void rewind();
+ QPDF_DLL
virtual size_t read(char* buffer, size_t length);
+ QPDF_DLL
virtual void unreadCh(char ch);
private:
diff --git a/include/qpdf/InputSource.hh b/include/qpdf/InputSource.hh
index a731918e..c20e5076 100644
--- a/include/qpdf/InputSource.hh
+++ b/include/qpdf/InputSource.hh
@@ -9,6 +9,7 @@
#ifndef __QPDF_INPUTSOURCE_HH__
#define __QPDF_INPUTSOURCE_HH__
+#include <qpdf/DLL.h>
#include <qpdf/Types.h>
#include <stdio.h>
#include <string>
@@ -16,18 +17,52 @@
class InputSource
{
public:
+ QPDF_DLL
InputSource() :
last_offset(0)
{
}
+ QPDF_DLL
virtual ~InputSource()
{
}
+ class Finder
+ {
+ public:
+ Finder()
+ {
+ }
+ virtual ~Finder()
+ {
+ }
+
+ virtual bool check() = 0;
+ };
+
+ QPDF_DLL
void setLastOffset(qpdf_offset_t);
+ QPDF_DLL
qpdf_offset_t getLastOffset() const;
+ QPDF_DLL
std::string readLine(size_t max_line_length);
+ // Find first or last occurrence of a sequence of characters
+ // starting within the range defined by offset and len such that,
+ // when the input source is positioned at the beginning of that
+ // sequence, finder.check() returns true. If len is 0, the search
+ // proceeds until EOF. If a qualifying pattern these methods
+ // return true and leave the input source positioned wherever
+ // check() left it at the end of the matching pattern.
+ QPDF_DLL
+ bool findFirst(char const* start_chars,
+ qpdf_offset_t offset, size_t len,
+ Finder& finder);
+ QPDF_DLL
+ bool findLast(char const* start_chars,
+ qpdf_offset_t offset, size_t len,
+ Finder& finder);
+
virtual qpdf_offset_t findAndSkipNextEOL() = 0;
virtual std::string const& getName() const = 0;
virtual qpdf_offset_t tell() = 0;
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index f57789a0..c9d120b4 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1006,6 +1006,27 @@ class QPDF
std::string key; // if ou_trailer_key or ou_root_key
};
+ class PatternFinder: public InputSource::Finder
+ {
+ public:
+ PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
+ qpdf(qpdf),
+ checker(checker)
+ {
+ }
+ virtual ~PatternFinder()
+ {
+ }
+ virtual bool check()
+ {
+ return (this->qpdf.*checker)();
+ }
+
+ private:
+ QPDF& qpdf;
+ bool (QPDF::*checker)();
+ };
+
// methods to support linearization checking -- implemented in
// QPDF_linearization.cc
void readLinearizationData();
diff --git a/libqpdf/InputSource.cc b/libqpdf/InputSource.cc
index 79c889bf..69cafeb8 100644
--- a/libqpdf/InputSource.cc
+++ b/libqpdf/InputSource.cc
@@ -1,7 +1,10 @@
#include <qpdf/InputSource.hh>
#include <string.h>
+#include <stdexcept>
+#include <qpdf/QTC.hh>
#include <qpdf/PointerHolder.hh>
+
void
InputSource::setLastOffset(qpdf_offset_t offset)
{
@@ -39,3 +42,167 @@ InputSource::readLine(size_t max_line_length)
}
return std::string(buf);
}
+
+bool
+InputSource::findFirst(char const* start_chars,
+ qpdf_offset_t offset, size_t len,
+ Finder& finder)
+{
+ // Basic approach: search for the first character of start_chars
+ // starting from offset but not going past len (if len != 0). Once
+ // the first character is found, see if it is the beginning of a
+ // sequence of characters matching start_chars. If so, call
+ // finder.check() to do caller-specific additional checks. If not,
+ // keep searching.
+
+ // This code is tricky and highly subject to off-by-one or other
+ // edge case logic errors. See comments throughout that explain
+ // how we're not missing any edge cases. There are also tests
+ // specifically constructed to make sure we caught the edge cases
+ // in testing.
+
+ char buf[1025]; // size known to input_source.cc in libtests
+ // To enable us to guarantee null-termination, save an extra byte
+ // so that buf[size] is valid memory.
+ size_t size = sizeof(buf) - 1;
+ if ((strlen(start_chars) < 1) || (strlen(start_chars) > size))
+ {
+ throw std::logic_error(
+ "InputSource::findSource called with"
+ " too small or too large of a character sequence");
+ }
+
+ char* p = 0;
+ qpdf_offset_t buf_offset = offset;
+ size_t bytes_read = 0;
+
+ // Guarantee that we return from this loop. Each time through, we
+ // either return, advance p, or restart the loop with a condition
+ // that will cause return on the next pass. Eventually we will
+ // either be out of range or hit EOF, either of which forces us to
+ // return.
+ while (true)
+ {
+ // Do we need to read more data? Pretend size = 5, buf starts
+ // at 0, and start_chars has 3 characters. buf[5] is valid and
+ // null. If p == 2, start_chars could be buf[2] through
+ // buf[4], so p + strlen(start_chars) == buf + size is okay.
+ // If p points to buf[size], since strlen(start_chars) is
+ // always >= 1, this overflow test will be correct for that
+ // case regardless of start_chars.
+ if ((p == 0) || ((p + strlen(start_chars)) > (buf + bytes_read)))
+ {
+ if (p)
+ {
+ QTC::TC("libtests", "InputSource read next block",
+ ((p == buf + bytes_read) ? 0 : 1));
+ buf_offset += (p - buf);
+ }
+ this->seek(buf_offset, SEEK_SET);
+ // Read into buffer and zero out the rest of the buffer
+ // including buf[size]. We allocated an extra byte so that
+ // we could guarantee null termination as an extra
+ // protection against overrun when using string functions.
+ bytes_read = this->read(buf, size);
+ if (bytes_read < strlen(start_chars))
+ {
+ QTC::TC("libtests", "InputSource find EOF",
+ bytes_read == 0 ? 0 : 1);
+ return false;
+ }
+ memset(buf + bytes_read, '\0', 1 + (size - bytes_read));
+ p = buf;
+ }
+
+ // Search for the first character.
+ if ((p = static_cast<char*>(
+ memchr(p, start_chars[0], bytes_read - (p - buf)))) != 0)
+ {
+ if (p == buf)
+ {
+ QTC::TC("libtests", "InputSource found match at buf[0]");
+ }
+ // Found first letter.
+ if (len != 0)
+ {
+ // Make sure it's in range.
+ size_t p_relative_offset = (p - buf) + (buf_offset - offset);
+ if (p_relative_offset >= len)
+ {
+ // out of range
+ QTC::TC("libtests", "InputSource out of range");
+ return false;
+ }
+ }
+ if ((p + strlen(start_chars)) > (buf + bytes_read))
+ {
+ // If there are not enough bytes left in the file for
+ // start_chars, we will detect this on the next pass
+ // as EOF and return.
+ QTC::TC("libtests", "InputSource not enough bytes");
+ continue;
+ }
+
+ // See if p points to a sequence matching start_chars. We
+ // already checked above to make sure we are not going to
+ // overrun memory.
+ if (strncmp(p, start_chars, strlen(start_chars)) == 0)
+ {
+ // Call finder.check() with the input source
+ // positioned to the point of the match.
+ this->seek(buf_offset + (p - buf), SEEK_SET);
+ if (finder.check())
+ {
+ return true;
+ }
+ else
+ {
+ QTC::TC("libtests", "InputSource start_chars matched but not check");
+ }
+ }
+ else
+ {
+ QTC::TC("libtests", "InputSource first char matched but not string");
+ }
+ // This occurrence of the first character wasn't a match.
+ // Skip over it and keep searching.
+ ++p;
+ }
+ else
+ {
+ // Trigger reading the next block
+ p = buf + bytes_read;
+ }
+ }
+ throw std::logic_error("InputSource after while (true)");
+}
+
+bool
+InputSource::findLast(char const* start_chars,
+ qpdf_offset_t offset, size_t len,
+ Finder& finder)
+{
+ bool found = false;
+ qpdf_offset_t after_found_offset = 0;
+ qpdf_offset_t cur_offset = offset;
+ size_t cur_len = len;
+ while (this->findFirst(start_chars, cur_offset, cur_len, finder))
+ {
+ if (found)
+ {
+ QTC::TC("libtests", "InputSource findLast found more than one");
+ }
+ else
+ {
+ found = true;
+ }
+ after_found_offset = this->tell();
+ cur_offset = after_found_offset;
+ cur_len = len - (cur_offset - offset);
+ }
+ if (found)
+ {
+ this->seek(after_found_offset, SEEK_SET);
+ }
+ return found;
+}
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
index 4e71f8d2..86a61081 100644
--- a/libqpdf/QPDF_linearization.cc
+++ b/libqpdf/QPDF_linearization.cc
@@ -118,7 +118,7 @@ QPDF::isLinearized()
}
else
{
- p = reinterpret_cast<char*>(memchr(p, '\0', tbuf_size - (p - buf)));
+ p = static_cast<char*>(memchr(p, '\0', tbuf_size - (p - buf)));
assert(p != 0);
while ((p - buf < tbuf_size) && (*p == 0))
{
diff --git a/libtests/build.mk b/libtests/build.mk
index 22d9299e..2a272799 100644
--- a/libtests/build.mk
+++ b/libtests/build.mk
@@ -6,6 +6,7 @@ BINS_libtests = \
concatenate \
flate \
hex \
+ input_source \
lzw \
md5 \
pcre \
diff --git a/libtests/input_source.cc b/libtests/input_source.cc
new file mode 100644
index 00000000..091a1ea1
--- /dev/null
+++ b/libtests/input_source.cc
@@ -0,0 +1,108 @@
+#include <iostream>
+#include <qpdf/BufferInputSource.hh>
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Buffer.hh>
+#include <qpdf/QPDFTokenizer.hh>
+
+static PointerHolder<Buffer>
+get_buffer()
+{
+ size_t size = 3172;
+ PointerHolder<Buffer> b(new Buffer(size));
+ unsigned char* p = b->getBuffer();
+ for (size_t i = 0; i < size; ++i)
+ {
+ p[i] = static_cast<unsigned char>(i & 0xff);
+ }
+ return b;
+}
+
+class Finder: public InputSource::Finder
+{
+ public:
+ Finder(PointerHolder<InputSource> is, std::string const& after) :
+ is(is),
+ after(after)
+ {
+ }
+ virtual ~Finder()
+ {
+ }
+ virtual bool check();
+
+ private:
+ PointerHolder<InputSource> is;
+ std::string after;
+};
+
+bool
+Finder::check()
+{
+ QPDFTokenizer tokenizer;
+ QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
+ if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "potato"))
+ {
+ t = tokenizer.readToken(is, "finder", true);
+ return (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, after));
+ }
+ return false;
+}
+
+void check(char const* description, bool expected, bool actual)
+{
+ std::cout << description << ": "
+ << ((actual == expected) ? "PASS" : "FAIL")
+ << std::endl;
+}
+
+int main()
+{
+ PointerHolder<Buffer> b1 = get_buffer();
+ unsigned char* b = b1->getBuffer();
+ // Straddle block boundaries
+ memcpy(b + 1022, "potato", 6);
+ // Overlap so that the first check() would advance past the start
+ // of the next match
+ memcpy(b + 2037, "potato potato salad ", 20);
+ PointerHolder<InputSource> is =
+ new BufferInputSource("test buffer input source", b1.getPointer());
+ Finder f1(is, "salad");
+ check("find potato salad", true,
+ is->findFirst("potato", 0, 0, f1));
+ check("barely find potato salad", true,
+ is->findFirst("potato", 1100, 945, f1));
+ check("barely find potato salad", true,
+ is->findFirst("potato", 2000, 45, f1));
+ check("potato salad is too late", false,
+ is->findFirst("potato", 1100, 944, f1));
+ check("potato salad is too late", false,
+ is->findFirst("potato", 2000, 44, f1));
+ check("potato salad not found", false,
+ is->findFirst("potato", 2045, 0, f1));
+ check("potato salad not found", false,
+ is->findFirst("potato", 0, 1, f1));
+
+ // Put one more right at EOF
+ memcpy(b + b1->getSize() - 12, "potato salad", 12);
+ check("potato salad at EOF", true,
+ is->findFirst("potato", 3000, 0, f1));
+
+ is->findFirst("potato", 0, 0, f1);
+ check("findFirst found first", true,
+ is->tell() == 2056);
+ check("findLast found potato salad", true,
+ is->findLast("potato", 0, 0, f1));
+ check("findLast found at EOF", true,
+ is->tell() == 3172);
+
+ // Make check() bump into EOF
+ memcpy(b + b1->getSize() - 6, "potato", 6);
+ check("potato but not salad salad at EOF", false,
+ is->findFirst("potato", 3000, 0, f1));
+ check("findLast found potato salad", true,
+ is->findLast("potato", 0, 0, f1));
+ check("findLast found first one", true,
+ is->tell() == 2056);
+
+ return 0;
+}
diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov
index ddbccd24..a5fe625f 100644
--- a/libtests/libtests.testcov
+++ b/libtests/libtests.testcov
@@ -16,3 +16,11 @@ bits write zero bits 0
Pl_ASCIIHexDecoder ignore space 0
Pl_ASCIIHexDecoder no-op flush 0
Pl_ASCIIHexDecoder partial flush 1
+InputSource read next block 1
+InputSource find EOF 1
+InputSource out of range 0
+InputSource first char matched but not string 0
+InputSource start_chars matched but not check 0
+InputSource not enough bytes 0
+InputSource findLast found more than one 0
+InputSource found match at buf[0] 0
diff --git a/libtests/qtest/input_source.test b/libtests/qtest/input_source.test
new file mode 100644
index 00000000..89a1c21a
--- /dev/null
+++ b/libtests/qtest/input_source.test
@@ -0,0 +1,26 @@
+#!/usr/bin/env perl
+require 5.008;
+use warnings;
+use strict;
+
+chdir("input_source") or die "chdir testdir failed: $!\n";
+
+require TestDriver;
+
+my $td = new TestDriver('InputSource');
+
+cleanup();
+
+$td->runtest("input source tests",
+ {$td->COMMAND => "input_source"},
+ {$td->FILE => "input_source.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+cleanup();
+
+$td->report(1);
+
+sub cleanup
+{
+}
diff --git a/libtests/qtest/input_source/input_source.out b/libtests/qtest/input_source/input_source.out
new file mode 100644
index 00000000..6af379c6
--- /dev/null
+++ b/libtests/qtest/input_source/input_source.out
@@ -0,0 +1,14 @@
+find potato salad: PASS
+barely find potato salad: PASS
+barely find potato salad: PASS
+potato salad is too late: PASS
+potato salad is too late: PASS
+potato salad not found: PASS
+potato salad not found: PASS
+potato salad at EOF: PASS
+findFirst found first: PASS
+findLast found potato salad: PASS
+findLast found at EOF: PASS
+potato but not salad salad at EOF: PASS
+findLast found potato salad: PASS
+findLast found first one: PASS