aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--include/qpdf/QPDF.hh1
-rw-r--r--libqpdf/QPDF.cc81
-rw-r--r--qpdf/qtest/qpdf/bad24-recover.out27
-rw-r--r--qpdf/qtest/qpdf/issue-101.out40
5 files changed, 87 insertions, 68 deletions
diff --git a/ChangeLog b/ChangeLog
index e04bdd8d..6b7454eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,12 @@
the (bool, T*) version of the constructor instead. If not, just
remove the second parameter.
+2017-08-09 Jay Berkenbilt <ejb@ql.org>
+
+ * When recovering stream length, find endobj without endstream as
+ well as just looking for endstream. Be a little more lax about
+ where we allow it to be found.
+
2017-08-05 Jay Berkenbilt <ejb@ql.org>
* Add --single-pages option to cause output to be written to a
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 68525eba..92a66a34 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1030,6 +1030,7 @@ class QPDF
// Methods to support pattern finding
bool findHeader();
bool findStartxref();
+ bool findEndstream();
// methods to support linearization checking -- implemented in
// QPDF_linearization.cc
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index fc3120bb..4cda1545 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -1231,76 +1231,43 @@ QPDF::readObject(PointerHolder<InputSource> input,
return object;
}
+bool
+QPDF::findEndstream()
+{
+ // Find endstream or endobj. Position the input at that token.
+ QPDFTokenizer::Token t = readToken(this->file, true);
+ if ((t.getType() == QPDFTokenizer::tt_word) &&
+ ((t.getValue() == "endobj") ||
+ (t.getValue() == "endstream")));
+ {
+ this->file->seek(this->file->getLastOffset(), SEEK_SET);
+ return true;
+ }
+ return false;
+}
+
size_t
QPDF::recoverStreamLength(PointerHolder<InputSource> input,
int objid, int generation,
qpdf_offset_t stream_offset)
{
- PCRE endobj_re("^\\s*endobj\\b");
-
// Try to reconstruct stream length by looking for
- // endstream(\r\n?|\n)endobj
+ // endstream or endobj
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description, stream_offset,
"attempting to recover stream length"));
- input->seek(0, SEEK_END);
- qpdf_offset_t eof = input->tell();
- input->seek(stream_offset, SEEK_SET);
- qpdf_offset_t last_line_offset = 0;
+ PatternFinder ef(*this, &QPDF::findEndstream);
size_t length = 0;
- static int const line_end_length = 12; // room for endstream\r\n\0
- char last_line_end[line_end_length];
- while (input->tell() < eof)
+ if (this->file->findFirst("end", stream_offset, 0, ef))
{
- std::string line = input->readLine(50);
- qpdf_offset_t line_offset = input->getLastOffset();
- if (endobj_re.match(line.c_str()))
+ length = this->file->tell() - stream_offset;
+ // Reread endstream but, if it was endobj, don't skip that.
+ QPDFTokenizer::Token t = readToken(this->file);
+ if (t.getValue() == "endobj")
{
- qpdf_offset_t endstream_offset = 0;
- if (last_line_offset >= line_end_length)
- {
- qpdf_offset_t cur_offset = input->tell();
- // Read from the end of the last line, guaranteeing
- // null termination
- qpdf_offset_t search_offset =
- line_offset - (line_end_length - 1);
- input->seek(search_offset, SEEK_SET);
- memset(last_line_end, '\0', line_end_length);
- input->read(last_line_end, line_end_length - 1);
- input->seek(cur_offset, SEEK_SET);
- // if endstream[\r\n] will fit in last_line_end, the
- // 'e' has to be in one of the first three spots.
- // Check explicitly rather than using strstr directly
- // in case there are nulls right before endstream.
- char* p = ((last_line_end[0] == 'e') ? last_line_end :
- (last_line_end[1] == 'e') ? last_line_end + 1 :
- (last_line_end[2] == 'e') ? last_line_end + 2 :
- 0);
- char* endstream_p = 0;
- if (p)
- {
- char* p1 = strstr(p, "endstream\n");
- char* p2 = strstr(p, "endstream\r");
- endstream_p = (p1 ? p1 : p2);
- }
- if (endstream_p)
- {
- endstream_offset =
- search_offset + (endstream_p - last_line_end);
- }
- }
- if (endstream_offset > 0)
- {
- // Stream probably ends right before "endstream"
- length = endstream_offset - stream_offset;
- // Go back to where we would have been if we had just
- // read the endstream.
- input->seek(line_offset, SEEK_SET);
- break;
- }
- }
- last_line_offset = line_offset;
+ this->file->seek(this->file->getLastOffset(), SEEK_SET);
+ }
}
if (length)
diff --git a/qpdf/qtest/qpdf/bad24-recover.out b/qpdf/qtest/qpdf/bad24-recover.out
index d1bbe736..8d7fd87c 100644
--- a/qpdf/qtest/qpdf/bad24-recover.out
+++ b/qpdf/qtest/qpdf/bad24-recover.out
@@ -1,10 +1,25 @@
WARNING: bad24.pdf (object 4 0, file position 385): expected endstream
WARNING: bad24.pdf (object 4 0, file position 341): attempting to recover stream length
-WARNING: bad24.pdf (object 4 0, file position 341): unable to recover stream data; treating stream as empty
-WARNING: bad24.pdf (object 4 0, file position 778): EOF while reading token
-/QTest is implicit
-/QTest is indirect and has type null (2)
-/QTest is null
+WARNING: bad24.pdf (object 4 0, file position 341): recovered stream length: 54
+/QTest is indirect and has type stream (10)
+/QTest is a stream. Dictionary: << /Length 44 >>
+Raw stream data:
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+enxstream
+
+Uncompressed stream data:
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+enxstream
+
+End of stream data
unparse: 4 0 R
-unparseResolved: null
+unparseResolved: 4 0 R
test 1 done
diff --git a/qpdf/qtest/qpdf/issue-101.out b/qpdf/qtest/qpdf/issue-101.out
index 7010cdda..bd457c3a 100644
--- a/qpdf/qtest/qpdf/issue-101.out
+++ b/qpdf/qtest/qpdf/issue-101.out
@@ -5,10 +5,16 @@ WARNING: issue-101.pdf (file position 1242): expected dictionary key but found n
WARNING: issue-101.pdf (file position 1242): dictionary ended prematurely; using null as value for last key
WARNING: issue-101.pdf (object 5 0, file position 1438): /Length key in stream dictionary is not an integer
WARNING: issue-101.pdf (object 5 0, file position 1509): attempting to recover stream length
-WARNING: issue-101.pdf (object 5 0, file position 1509): recovered stream length: 205
+WARNING: issue-101.pdf (object 5 0, file position 1509): recovered stream length: 8
+WARNING: issue-101.pdf (trailer, file position 1631): /Length key in stream dictionary is not an integer
+WARNING: issue-101.pdf (trailer, file position 1702): attempting to recover stream length
+WARNING: issue-101.pdf (trailer, file position 1702): recovered stream length: 12
WARNING: issue-101.pdf (trailer, file position 2026): /Length key in stream dictionary is not an integer
WARNING: issue-101.pdf (trailer, file position 2097): attempting to recover stream length
-WARNING: issue-101.pdf (trailer, file position 2097): recovered stream length: 709
+WARNING: issue-101.pdf (trailer, file position 2097): recovered stream length: 12
+WARNING: issue-101.pdf (trailer, file position 2613): /Length key in stream dictionary is not an integer
+WARNING: issue-101.pdf (trailer, file position 2684): attempting to recover stream length
+WARNING: issue-101.pdf (trailer, file position 2684): recovered stream length: 74
WARNING: issue-101.pdf (trailer, file position 2928): unknown token while reading object; treating as string
WARNING: issue-101.pdf (trailer, file position 2929): unknown token while reading object; treating as string
WARNING: issue-101.pdf (trailer, file position 2928): expected dictionary key but found non-name object; inserting key /QPDFFake1
@@ -22,8 +28,32 @@ WARNING: issue-101.pdf (trailer, file position 3410): attempting to recover stre
WARNING: issue-101.pdf (trailer, file position 3410): recovered stream length: 12
WARNING: issue-101.pdf (trailer, file position 3560): /Length key in stream dictionary is not an integer
WARNING: issue-101.pdf (trailer, file position 3631): attempting to recover stream length
-WARNING: issue-101.pdf (trailer, file position 3631): recovered stream length: 167
+WARNING: issue-101.pdf (trailer, file position 3631): recovered stream length: 8
WARNING: issue-101.pdf (trailer, file position 4113): /Length key in stream dictionary is not an integer
WARNING: issue-101.pdf (trailer, file position 4184): attempting to recover stream length
-WARNING: issue-101.pdf (trailer, file position 4184): unable to recover stream data; treating stream as empty
-issue-101.pdf: unable to find trailer dictionary while recovering damaged file
+WARNING: issue-101.pdf (trailer, file position 4184): recovered stream length: 8
+WARNING: issue-101.pdf (file position 591): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 625): treating unexpected brace token as null
+WARNING: issue-101.pdf (file position 626): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 637): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 639): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 644): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 647): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 687): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 691): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 696): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 698): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 701): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 711): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 742): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 745): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 747): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 777): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 790): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 800): treating unexpected brace token as null
+WARNING: issue-101.pdf (file position 801): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 811): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 819): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 832): unknown token while reading object; treating as string
+WARNING: issue-101.pdf (file position 856): unexpected >
+issue-101.pdf (file position 856): unable to find /Root dictionary