aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDF.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2021-11-07 21:23:29 +0100
committerJay Berkenbilt <ejb@ql.org>2021-11-07 21:27:22 +0100
commitf45dacf4cbfab73ce470d0a61d4acee14206ab2b (patch)
treee1e744fcb132b371c43bfe8d2b2b325137d251d3 /libqpdf/QPDF.cc
parent0a71750ee808a23017315da1c04fb826791aeb74 (diff)
downloadqpdf-f45dacf4cbfab73ce470d0a61d4acee14206ab2b.tar.zst
Make recovery logic flexible about where objects end (fixes #573)
Don't assume endobj is at the beginning of the line. This means we are looking at tokens for every line, but the odds of n n obj appearing in the middle of the object are likely much lower than endobj not being at the beginning of the line or missing entirely. This will probably have a negative impact on recovery time for very large files. Hopefully it will be worth it.
Diffstat (limited to 'libqpdf/QPDF.cc')
-rw-r--r--libqpdf/QPDF.cc61
1 files changed, 25 insertions, 36 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 94161c08..4ce60ea2 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -590,7 +590,6 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->m->file->seek(0, SEEK_END);
qpdf_offset_t eof = this->m->file->tell();
this->m->file->seek(0, SEEK_SET);
- bool in_obj = false;
qpdf_offset_t line_start = 0;
// Don't allow very long tokens here during recovery.
static size_t const MAX_LEN = 100;
@@ -604,46 +603,36 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->m->file->tell() - toO(t1.getValue().length());
if (token_start >= next_line_start)
{
- // don't process yet
+ // don't process yet -- wait until we get to the line
+ // containing this token
}
- else if (in_obj)
- {
- if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))
- {
- in_obj = false;
- }
- }
- else
+ else if (t1.getType() == QPDFTokenizer::tt_integer)
{
- if (t1.getType() == QPDFTokenizer::tt_integer)
+ QPDFTokenizer::Token t2 =
+ readToken(this->m->file, MAX_LEN);
+ QPDFTokenizer::Token t3 =
+ readToken(this->m->file, MAX_LEN);
+ if ((t2.getType() == QPDFTokenizer::tt_integer) &&
+ (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
{
- QPDFTokenizer::Token t2 =
- readToken(this->m->file, MAX_LEN);
- QPDFTokenizer::Token t3 =
- readToken(this->m->file, MAX_LEN);
- if ((t2.getType() == QPDFTokenizer::tt_integer) &&
- (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
- {
- in_obj = true;
- int obj = QUtil::string_to_int(t1.getValue().c_str());
- int gen = QUtil::string_to_int(t2.getValue().c_str());
- insertXrefEntry(obj, 1, token_start, gen, true);
- }
+ int obj = QUtil::string_to_int(t1.getValue().c_str());
+ int gen = QUtil::string_to_int(t2.getValue().c_str());
+ insertXrefEntry(obj, 1, token_start, gen, true);
}
- else if ((! this->m->trailer.isInitialized()) &&
- (t1 == QPDFTokenizer::Token(
- QPDFTokenizer::tt_word, "trailer")))
- {
- QPDFObjectHandle t =
+ }
+ else if ((! this->m->trailer.isInitialized()) &&
+ (t1 == QPDFTokenizer::Token(
+ QPDFTokenizer::tt_word, "trailer")))
+ {
+ QPDFObjectHandle t =
readObject(this->m->file, "trailer", 0, 0, false);
- if (! t.isDictionary())
- {
- // Oh well. It was worth a try.
- }
- else
- {
- setTrailer(t);
- }
+ if (! t.isDictionary())
+ {
+ // Oh well. It was worth a try.
+ }
+ else
+ {
+ setTrailer(t);
}
}
this->m->file->seek(next_line_start, SEEK_SET);