aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-05 20:54:07 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-11 03:30:32 +0200
commit03aa9679ac16be44348f29a97c2c36145ae5a35a (patch)
tree2795c5ee95ea63de7d26a7fd2a716d830f073802 /libqpdf
parent1765c6ec20625b99451acceb1ffcaaca812f379e (diff)
downloadqpdf-03aa9679ac16be44348f29a97c2c36145ae5a35a.tar.zst
Find starxref without PCRE
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/QPDF.cc57
-rw-r--r--libqpdf/QPDFTokenizer.cc2
2 files changed, 26 insertions, 33 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 90ac749b..295787b1 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -254,11 +254,26 @@ QPDF::findHeader()
return valid;
}
+bool
+QPDF::findStartxref()
+{
+ QPDFTokenizer::Token t = readToken(this->file, true);
+ if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref"))
+ {
+ t = readToken(this->file, true);
+ if (t.getType() == QPDFTokenizer::tt_integer)
+ {
+ // Position in front of offset token
+ this->file->seek(this->file->getLastOffset(), SEEK_SET);
+ return true;
+ }
+ }
+ return false;
+}
+
void
QPDF::parse(char const* password)
{
- PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
-
if (password)
{
this->provided_password = password;
@@ -283,47 +298,25 @@ QPDF::parse(char const* password)
// PDF spec says %%EOF must be found within the last 1024 bytes of
// the file. We add an extra 30 characters to leave room for the
// startxref stuff.
- static int const tbuf_size = 1054;
this->file->seek(0, SEEK_END);
- if (this->file->tell() > tbuf_size)
+ qpdf_offset_t end_offset = this->file->tell();
+ qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
+ PatternFinder sf(*this, &QPDF::findStartxref);
+ qpdf_offset_t xref_offset = 0;
+ if (this->file->findLast("startxref", start_offset, 0, sf))
{
- this->file->seek(-tbuf_size, SEEK_END);
- }
- else
- {
- this->file->rewind();
- }
- char* buf = new char[tbuf_size + 1];
- // Put buf in an array-style PointerHolder to guarantee deletion
- // of buf.
- PointerHolder<char> b(true, buf);
- memset(buf, '\0', tbuf_size + 1);
- this->file->read(buf, tbuf_size);
-
- // Since buf may contain null characters, we can't do a regexp
- // search on buf directly. Find the last occurrence within buf
- // where the regexp matches.
- char* p = buf;
- char const* candidate = "";
- while ((p = static_cast<char*>(memchr(p, 's', tbuf_size - (p - buf)))) != 0)
- {
- if (eof_re.match(p))
- {
- candidate = p;
- }
- ++p;
+ xref_offset = QUtil::string_to_ll(
+ readToken(this->file).getValue().c_str());
}
try
{
- PCRE::Match m2 = eof_re.match(candidate);
- if (! m2)
+ if (xref_offset == 0)
{
QTC::TC("qpdf", "QPDF can't find startxref");
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
"can't find startxref");
}
- qpdf_offset_t xref_offset = QUtil::string_to_ll(m2.getMatch(1).c_str());
read_xref(xref_offset);
}
catch (QPDFExc& e)
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index c0e09279..6447b93b 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -521,7 +521,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
{
if (allow_bad)
{
-// QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
+ QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
}
else
{