aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--qpdf/qpdf.cc189
-rw-r--r--qpdf/qtest/qpdf.test15
-rw-r--r--qpdf/qtest/qpdf/bad-xref-entry-corrected.out14
-rw-r--r--qpdf/qtest/qpdf/bad-xref-entry.out7
-rw-r--r--qpdf/qtest/qpdf/bad-xref-entry.pdf101
6 files changed, 243 insertions, 93 deletions
diff --git a/ChangeLog b/ChangeLog
index 8732f22e..ac32ea25 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2017-07-29 Jay Berkenbilt <ejb@ql.org>
+ * When passing multiple inspection arguments, run --check first,
+ and defer exit until after all the checks have been run. This
+ makes it possible to force operations such as --show-xref to be
+ delayed until after recovery attempts have been made. For example,
+ if you have a file with a syntactically valid xref table that has
+ some offsets that are incorrect, running qpdf --check --show-xref
+ on that file will first recover the xref and the dump the
+ recovered xref, while just running qpdf --show-xref will show the
+ xref table as present in the file. Fixes #42.
+
* When recovering stream length, indicate the recovered length.
Fixes #44.
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index 1882f7c9..ae365c70 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -1383,6 +1383,97 @@ int main(int argc, char* argv[])
}
if (outfilename == 0)
{
+ int exit_code = 0;
+ if (check)
+ {
+ // Code below may set okay to false but not to true.
+ // We assume okay until we prove otherwise but may
+ // continue to perform additional checks after finding
+ // errors.
+ bool okay = true;
+ std::cout << "checking " << infilename << std::endl;
+ try
+ {
+ int extension_level = pdf.getExtensionLevel();
+ std::cout << "PDF Version: " << pdf.getPDFVersion();
+ if (extension_level > 0)
+ {
+ std::cout << " extension level "
+ << pdf.getExtensionLevel();
+ }
+ std::cout << std::endl;
+ ::show_encryption(pdf);
+ if (pdf.isLinearized())
+ {
+ std::cout << "File is linearized\n";
+ if (! pdf.checkLinearization())
+ {
+ // any errors are reported by checkLinearization()
+ okay = false;
+ }
+ }
+ else
+ {
+ std::cout << "File is not linearized\n";
+ }
+
+ // Write the file no nowhere, uncompressing
+ // streams. This causes full file traversal and
+ // decoding of all streams we can decode.
+ QPDFWriter w(pdf);
+ Pl_Discard discard;
+ w.setOutputPipeline(&discard);
+ w.setStreamDataMode(qpdf_s_uncompress);
+ w.write();
+
+ // Parse all content streams
+ std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+ DiscardContents discard_contents;
+ int pageno = 0;
+ for (std::vector<QPDFObjectHandle>::iterator iter =
+ pages.begin();
+ iter != pages.end(); ++iter)
+ {
+ ++pageno;
+ try
+ {
+ QPDFObjectHandle::parseContentStream(
+ (*iter).getKey("/Contents"),
+ &discard_contents);
+ }
+ catch (QPDFExc& e)
+ {
+ okay = false;
+ std::cout << "page " << pageno << ": "
+ << e.what() << std::endl;
+ }
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::cout << e.what() << std::endl;
+ okay = false;
+ }
+ if (okay)
+ {
+ if (! pdf.getWarnings().empty())
+ {
+ exit_code = EXIT_WARNING;
+ }
+ else
+ {
+ std::cout << "No syntax or stream encoding errors"
+ << " found; the file may still contain"
+ << std::endl
+ << "errors that qpdf cannot detect"
+ << std::endl;
+ }
+ }
+ else
+ {
+ exit_code = EXIT_ERROR;
+ }
+ }
if (show_npages)
{
QTC::TC("qpdf", "qpdf npages");
@@ -1402,7 +1493,7 @@ int main(int argc, char* argv[])
}
else
{
- exit(EXIT_ERROR);
+ exit_code = EXIT_ERROR;
}
}
if (show_linearization)
@@ -1435,7 +1526,7 @@ int main(int argc, char* argv[])
QTC::TC("qpdf", "qpdf unable to filter");
std::cerr << "Unable to filter stream data."
<< std::endl;
- exit(EXIT_ERROR);
+ exit_code = EXIT_ERROR;
}
else
{
@@ -1512,96 +1603,10 @@ int main(int argc, char* argv[])
}
}
}
- if (check)
- {
- // Code below may set okay to false but not to true.
- // We assume okay until we prove otherwise but may
- // continue to perform additional checks after finding
- // errors.
- bool okay = true;
- std::cout << "checking " << infilename << std::endl;
- try
- {
- int extension_level = pdf.getExtensionLevel();
- std::cout << "PDF Version: " << pdf.getPDFVersion();
- if (extension_level > 0)
- {
- std::cout << " extension level "
- << pdf.getExtensionLevel();
- }
- std::cout << std::endl;
- ::show_encryption(pdf);
- if (pdf.isLinearized())
- {
- std::cout << "File is linearized\n";
- if (! pdf.checkLinearization())
- {
- // any errors are reported by checkLinearization()
- okay = false;
- }
- }
- else
- {
- std::cout << "File is not linearized\n";
- }
-
- // Write the file no nowhere, uncompressing
- // streams. This causes full file traversal and
- // decoding of all streams we can decode.
- QPDFWriter w(pdf);
- Pl_Discard discard;
- w.setOutputPipeline(&discard);
- w.setStreamDataMode(qpdf_s_uncompress);
- w.write();
-
- // Parse all content streams
- std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
- DiscardContents discard_contents;
- int pageno = 0;
- for (std::vector<QPDFObjectHandle>::iterator iter =
- pages.begin();
- iter != pages.end(); ++iter)
- {
- ++pageno;
- try
- {
- QPDFObjectHandle::parseContentStream(
- (*iter).getKey("/Contents"),
- &discard_contents);
- }
- catch (QPDFExc& e)
- {
- okay = false;
- std::cout << "page " << pageno << ": "
- << e.what() << std::endl;
- }
- }
- }
- catch (std::exception& e)
- {
- std::cout << e.what() << std::endl;
- okay = false;
- }
- if (okay)
- {
- if (! pdf.getWarnings().empty())
- {
- exit(EXIT_WARNING);
- }
- else
- {
- std::cout << "No syntax or stream encoding errors"
- << " found; the file may still contain"
- << std::endl
- << "errors that qpdf cannot detect"
- << std::endl;
- }
- }
- else
- {
- exit(EXIT_ERROR);
- }
- }
+ if (exit_code)
+ {
+ exit(exit_code);
+ }
}
else
{
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 81c69025..031c33a9 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -206,7 +206,7 @@ $td->runtest("remove page we don't have",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 91;
+$n_tests += 93;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -628,6 +628,19 @@ $td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "newline-before-endstream.pdf"});
+# Demonstrate show-xref after check and not after check to illustrate
+# that it can dump the real xref or the recovered xref.
+$td->runtest("dump bad xref",
+ {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"},
+ {$td->FILE => "bad-xref-entry.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("dump corrected bad xref",
+ {$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"},
+ {$td->FILE => "bad-xref-entry-corrected.out",
+ $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+
show_ntests();
# ----------
diff --git a/qpdf/qtest/qpdf/bad-xref-entry-corrected.out b/qpdf/qtest/qpdf/bad-xref-entry-corrected.out
new file mode 100644
index 00000000..258c18a7
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-xref-entry-corrected.out
@@ -0,0 +1,14 @@
+checking bad-xref-entry.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+WARNING: bad-xref-entry.pdf: file is damaged
+WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj
+WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table
+1/0: uncompressed; offset = 52
+2/0: uncompressed; offset = 133
+3/0: uncompressed; offset = 242
+4/0: uncompressed; offset = 484
+5/0: uncompressed; offset = 583
+6/0: uncompressed; offset = 629
+7/0: uncompressed; offset = 774
diff --git a/qpdf/qtest/qpdf/bad-xref-entry.out b/qpdf/qtest/qpdf/bad-xref-entry.out
new file mode 100644
index 00000000..3690e02c
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-xref-entry.out
@@ -0,0 +1,7 @@
+1/0: uncompressed; offset = 52
+2/0: uncompressed; offset = 133
+3/0: uncompressed; offset = 242
+4/0: uncompressed; offset = 484
+5/0: uncompressed; offset = 580
+6/0: uncompressed; offset = 629
+7/0: uncompressed; offset = 774
diff --git a/qpdf/qtest/qpdf/bad-xref-entry.pdf b/qpdf/qtest/qpdf/bad-xref-entry.pdf
new file mode 100644
index 00000000..32df6674
--- /dev/null
+++ b/qpdf/qtest/qpdf/bad-xref-entry.pdf
@@ -0,0 +1,101 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents 4 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 6 0 R
+ >>
+ /ProcSet 7 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 4 0
+4 0 obj
+<<
+ /Length 5 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+44
+endobj
+
+%% Original object ID: 6 0
+6 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 5 0
+7 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 8
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000242 00000 n
+0000000484 00000 n
+0000000580 00000 n
+0000000629 00000 n
+0000000774 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 8
+ /ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>]
+>>
+startxref
+809
+%%EOF