summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-05 20:54:07 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-11 03:30:32 +0200
commit03aa9679ac16be44348f29a97c2c36145ae5a35a (patch)
tree2795c5ee95ea63de7d26a7fd2a716d830f073802
parent1765c6ec20625b99451acceb1ffcaaca812f379e (diff)
downloadqpdf-03aa9679ac16be44348f29a97c2c36145ae5a35a.tar.zst
Find starxref without PCRE
-rw-r--r--include/qpdf/QPDF.hh1
-rw-r--r--libqpdf/QPDF.cc57
-rw-r--r--libqpdf/QPDFTokenizer.cc2
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf/issue-117.out2
5 files changed, 29 insertions, 34 deletions
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 072f4991..68525eba 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1029,6 +1029,7 @@ class QPDF
// Methods to support pattern finding
bool findHeader();
+ bool findStartxref();
// methods to support linearization checking -- implemented in
// QPDF_linearization.cc
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 90ac749b..295787b1 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -254,11 +254,26 @@ QPDF::findHeader()
return valid;
}
+bool
+QPDF::findStartxref()
+{
+ QPDFTokenizer::Token t = readToken(this->file, true);
+ if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref"))
+ {
+ t = readToken(this->file, true);
+ if (t.getType() == QPDFTokenizer::tt_integer)
+ {
+ // Position in front of offset token
+ this->file->seek(this->file->getLastOffset(), SEEK_SET);
+ return true;
+ }
+ }
+ return false;
+}
+
void
QPDF::parse(char const* password)
{
- PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
-
if (password)
{
this->provided_password = password;
@@ -283,47 +298,25 @@ QPDF::parse(char const* password)
// PDF spec says %%EOF must be found within the last 1024 bytes of
// the file. We add an extra 30 characters to leave room for the
// startxref stuff.
- static int const tbuf_size = 1054;
this->file->seek(0, SEEK_END);
- if (this->file->tell() > tbuf_size)
+ qpdf_offset_t end_offset = this->file->tell();
+ qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
+ PatternFinder sf(*this, &QPDF::findStartxref);
+ qpdf_offset_t xref_offset = 0;
+ if (this->file->findLast("startxref", start_offset, 0, sf))
{
- this->file->seek(-tbuf_size, SEEK_END);
- }
- else
- {
- this->file->rewind();
- }
- char* buf = new char[tbuf_size + 1];
- // Put buf in an array-style PointerHolder to guarantee deletion
- // of buf.
- PointerHolder<char> b(true, buf);
- memset(buf, '\0', tbuf_size + 1);
- this->file->read(buf, tbuf_size);
-
- // Since buf may contain null characters, we can't do a regexp
- // search on buf directly. Find the last occurrence within buf
- // where the regexp matches.
- char* p = buf;
- char const* candidate = "";
- while ((p = static_cast<char*>(memchr(p, 's', tbuf_size - (p - buf)))) != 0)
- {
- if (eof_re.match(p))
- {
- candidate = p;
- }
- ++p;
+ xref_offset = QUtil::string_to_ll(
+ readToken(this->file).getValue().c_str());
}
try
{
- PCRE::Match m2 = eof_re.match(candidate);
- if (! m2)
+ if (xref_offset == 0)
{
QTC::TC("qpdf", "QPDF can't find startxref");
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
"can't find startxref");
}
- qpdf_offset_t xref_offset = QUtil::string_to_ll(m2.getMatch(1).c_str());
read_xref(xref_offset);
}
catch (QPDFExc& e)
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index c0e09279..6447b93b 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -521,7 +521,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
{
if (allow_bad)
{
-// QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
+ QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
}
else
{
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index f5690fd6..eb5ef1a2 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -290,3 +290,4 @@ qpdf read args from file 0
qpdf single-pages %d 0
qpdf single-pages .pdf 0
qpdf single-pages other 0
+QPDFTokenizer allowing bad token 0
diff --git a/qpdf/qtest/qpdf/issue-117.out b/qpdf/qtest/qpdf/issue-117.out
index 8aa05072..45625631 100644
--- a/qpdf/qtest/qpdf/issue-117.out
+++ b/qpdf/qtest/qpdf/issue-117.out
@@ -1,5 +1,5 @@
WARNING: issue-117.pdf: file is damaged
-WARNING: issue-117.pdf: can't find startxref
+WARNING: issue-117.pdf (file position 3526): xref not found
WARNING: issue-117.pdf: Attempting to reconstruct cross-reference table
WARNING: issue-117.pdf (file position 66): loop detected resolving object 2 0
WARNING: issue-117.pdf (object 2 0, file position 22): /Length key in stream dictionary is not an integer