aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--TODO8
-rw-r--r--cSpell.json3
-rw-r--r--include/qpdf/QPDF.hh20
-rw-r--r--include/qpdf/QUtil.hh17
-rw-r--r--libqpdf/JSON.cc8
-rw-r--r--libqpdf/QPDF.cc7
-rw-r--r--libqpdf/QPDFJob.cc50
-rw-r--r--libqpdf/QPDFTokenizer.cc48
-rw-r--r--libqpdf/QPDFWriter.cc39
-rw-r--r--libqpdf/QUtil.cc31
-rw-r--r--manual/release-notes.rst6
-rw-r--r--qpdf/qtest/image-optimization.test2
-rw-r--r--qpdf/qtest/invalid-objects.test7
-rw-r--r--qpdf/qtest/qpdf/catalgg.out6
-rw-r--r--qpdf/qtest/qpdf/catalgg.pdf79
-rw-r--r--qpdf/qtest/qpdf/nested-images.pdfbin0 -> 5744 bytes
-rw-r--r--qpdf/qtest/qpdf/optimize-images-nested-images-json.out18
-rw-r--r--qpdf/qtest/qpdf/optimize-images-nested-images.out2
19 files changed, 238 insertions, 119 deletions
diff --git a/ChangeLog b/ChangeLog
index cc1a6608..4c8ddfee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2023-03-18 Jay Berkenbilt <ejb@ql.org>
+
+ * Enhance --optimize-images to support images nested inside of
+ form XObjects. Thanks to Connor Osborne (github user cdosborn) for
+ the contribution. Fixes #923.
+
2023-02-25 Jay Berkenbilt <ejb@ql.org>
* 11.3.0: release
diff --git a/TODO b/TODO
index fce507ab..4eba6f6b 100644
--- a/TODO
+++ b/TODO
@@ -263,6 +263,14 @@ Always:
For qpdf 12, see https://github.com/qpdf/qpdf/discussions/785
+C++ Version Changes
+===================
+
+Use
+// C++NN: ...
+to mark places in the code that should be updated when we require at
+least that version of C++.
+
Page splitting/merging
======================
diff --git a/cSpell.json b/cSpell.json
index 2e33690e..8b88eb32 100644
--- a/cSpell.json
+++ b/cSpell.json
@@ -27,8 +27,6 @@
"autobuilder",
"autobuilders",
"autofiles",
- "autofiles",
- "autogen",
"autogen",
"autolabel",
"autopkgtest",
@@ -119,7 +117,6 @@
"encodable",
"encp",
"endforeach",
- "endforeach",
"endfunction",
"endianness",
"endl",
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 13b66977..4541db64 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -910,8 +910,7 @@ class QPDF
}
};
- // The ParseGuard class allows QPDFObjectHandle to detect
- // re-entrant parsing.
+ // The ParseGuard class allows QPDFParser to detect re-entrant parsing.
class ParseGuard
{
friend class QPDFParser;
@@ -933,7 +932,7 @@ class QPDF
QPDF* qpdf;
};
- // Pipe class is restricted to QPDF_Stream
+ // Pipe class is restricted to QPDF_Stream.
class Pipe
{
friend class QPDF_Stream;
@@ -961,6 +960,20 @@ class QPDF
}
};
+ // JobSetter class is restricted to QPDFJob.
+ class JobSetter
+ {
+ friend class QPDFJob;
+
+ private:
+ // Enable enhanced warnings for pdf file checking.
+ static void
+ setCheckMode(QPDF& qpdf, bool val)
+ {
+ qpdf.m->check_mode = val;
+ }
+ };
+
// For testing only -- do not add to DLL
static bool test_json_validators();
@@ -1698,6 +1711,7 @@ class QPDF
bool ignore_xref_streams{false};
bool suppress_warnings{false};
bool attempt_recovery{true};
+ bool check_mode{false};
std::shared_ptr<EncryptionParameters> encp;
std::string pdf_version;
std::map<QPDFObjGen, QPDFXRefEntry> xref_table;
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index b42fe195..4d46f630 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -223,6 +223,11 @@ namespace QUtil
QPDF_DLL
std::string hex_decode(std::string const&);
+ // Decode a single hex digit into a char in the range 0 <= char < 16. Return
+ // a char >= 16 if digit is not a valid hex digit.
+ QPDF_DLL
+ inline constexpr char hex_decode_char(char digit) noexcept;
+
// Set stdin, stdout to binary mode
QPDF_DLL
void binary_stdout();
@@ -550,8 +555,7 @@ namespace QUtil
inline bool
QUtil::is_hex_digit(char ch)
{
- return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') ||
- ('A' <= ch && ch <= 'F');
+ return hex_decode_char(ch) < '\20';
}
inline bool
@@ -603,4 +607,13 @@ QUtil::hex_encode_char(char c)
'#', hexchars[static_cast<unsigned char>(c) >> 4], hexchars[c & 0x0f]};
}
+inline constexpr char
+QUtil::hex_decode_char(char digit) noexcept
+{
+ return digit <= '9' && digit >= '0'
+ ? char(digit - '0')
+ : (digit >= 'a' ? char(digit - 'a' + 10)
+ : (digit >= 'A' ? char(digit - 'A' + 10) : '\20'));
+}
+
#endif // QUTIL_HH
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc
index cb60eabc..fbf06f88 100644
--- a/libqpdf/JSON.cc
+++ b/libqpdf/JSON.cc
@@ -1121,12 +1121,8 @@ JSONParser::getToken()
case ls_u4:
using ui = unsigned int;
- if ('0' <= *p && *p <= '9') {
- u_value = 16 * u_value + (ui(*p) - ui('0'));
- } else if ('a' <= *p && *p <= 'f') {
- u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
- } else if ('A' <= *p && *p <= 'F') {
- u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
+ if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) {
+ u_value = 16 * u_value + val;
} else {
tokenError();
}
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 063c0f55..83944be4 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -2461,6 +2461,13 @@ QPDF::getRoot()
QPDFObjectHandle root = this->m->trailer.getKey("/Root");
if (!root.isDictionary()) {
throw damagedPDF("", 0, "unable to find /Root dictionary");
+ } else if (
+ // Check_mode is an interim solution to request #810 pending a more
+ // comprehensive review of the approach to more extensive checks and
+ // warning levels.
+ m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
+ warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
+ root.replaceKey("/Type", "/Catalog"_qpdf);
}
return root;
}
diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc
index a4b1a026..fbaa53e5 100644
--- a/libqpdf/QPDFJob.cc
+++ b/libqpdf/QPDFJob.cc
@@ -798,6 +798,7 @@ QPDFJob::doCheck(QPDF& pdf)
bool okay = true;
auto& cout = *this->m->log->getInfo();
cout << "checking " << m->infilename.get() << "\n";
+ QPDF::JobSetter::setCheckMode(pdf, true);
try {
int extension_level = pdf.getExtensionLevel();
cout << "PDF Version: " << pdf.getPDFVersion();
@@ -2363,31 +2364,30 @@ QPDFJob::handleTransformations(QPDF& pdf)
int pageno = 0;
for (auto& ph: dh.getAllPages()) {
++pageno;
- QPDFObjectHandle page = ph.getObjectHandle();
- for (auto& iter2: ph.getImages()) {
- std::string name = iter2.first;
- QPDFObjectHandle& image = iter2.second;
- ImageOptimizer* io = new ImageOptimizer(
- *this,
- m->oi_min_width,
- m->oi_min_height,
- m->oi_min_area,
- image);
- std::shared_ptr<QPDFObjectHandle::StreamDataProvider> sdp(io);
- if (io->evaluate(
- "image " + name + " on page " +
- std::to_string(pageno))) {
- QPDFObjectHandle new_image = pdf.newStream();
- new_image.replaceDict(image.getDict().shallowCopy());
- new_image.replaceStreamData(
- sdp,
- QPDFObjectHandle::newName("/DCTDecode"),
- QPDFObjectHandle::newNull());
- ph.getAttribute("/Resources", true)
- .getKey("/XObject")
- .replaceKey(name, new_image);
- }
- }
+ ph.forEachImage(
+ true,
+ [this, pageno, &pdf](
+ QPDFObjectHandle& obj,
+ QPDFObjectHandle& xobj_dict,
+ std::string const& key) {
+ auto io = std::make_unique<ImageOptimizer>(
+ *this,
+ m->oi_min_width,
+ m->oi_min_height,
+ m->oi_min_area,
+ obj);
+ if (io->evaluate(
+ "image " + key + " on page " +
+ std::to_string(pageno))) {
+ QPDFObjectHandle new_image = pdf.newStream();
+ new_image.replaceDict(obj.getDict().shallowCopy());
+ new_image.replaceStreamData(
+ std::move(io),
+ QPDFObjectHandle::newName("/DCTDecode"),
+ QPDFObjectHandle::newNull());
+ xobj_dict.replaceKey(key, new_image);
+ }
+ });
}
}
if (m->generate_appearances) {
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 990d5b65..d8d457ab 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -449,18 +449,9 @@ QPDFTokenizer::inNameHex1(char ch)
{
this->hex_char = ch;
- if ('0' <= ch && ch <= '9') {
- this->char_code = 16 * (int(ch) - int('0'));
+ if (char hval = QUtil::hex_decode_char(ch); hval < '\20') {
+ this->char_code = int(hval) << 4;
this->state = st_name_hex2;
-
- } else if ('A' <= ch && ch <= 'F') {
- this->char_code = 16 * (10 + int(ch) - int('A'));
- this->state = st_name_hex2;
-
- } else if ('a' <= ch && ch <= 'f') {
- this->char_code = 16 * (10 + int(ch) - int('a'));
- this->state = st_name_hex2;
-
} else {
QTC::TC("qpdf", "QPDFTokenizer bad name 1");
this->error_message = "name with stray # will not work with PDF >= 1.2";
@@ -475,15 +466,8 @@ QPDFTokenizer::inNameHex1(char ch)
void
QPDFTokenizer::inNameHex2(char ch)
{
- if ('0' <= ch && ch <= '9') {
- this->char_code += int(ch) - int('0');
-
- } else if ('A' <= ch && ch <= 'F') {
- this->char_code += 10 + int(ch) - int('A');
-
- } else if ('a' <= ch && ch <= 'f') {
- this->char_code += 10 + int(ch) - int('a');
-
+ if (char hval = QUtil::hex_decode_char(ch); hval < '\20') {
+ this->char_code |= int(hval);
} else {
QTC::TC("qpdf", "QPDFTokenizer bad name 2");
this->error_message = "name with stray # will not work with PDF >= 1.2";
@@ -675,16 +659,8 @@ QPDFTokenizer::inLiteral(char ch)
void
QPDFTokenizer::inHexstring(char ch)
{
- if ('0' <= ch && ch <= '9') {
- this->char_code = 16 * (int(ch) - int('0'));
- this->state = st_in_hexstring_2nd;
-
- } else if ('A' <= ch && ch <= 'F') {
- this->char_code = 16 * (10 + int(ch) - int('A'));
- this->state = st_in_hexstring_2nd;
-
- } else if ('a' <= ch && ch <= 'f') {
- this->char_code = 16 * (10 + int(ch) - int('a'));
+ if (char hval = QUtil::hex_decode_char(ch); hval < '\20') {
+ this->char_code = int(hval) << 4;
this->state = st_in_hexstring_2nd;
} else if (ch == '>') {
@@ -706,16 +682,8 @@ QPDFTokenizer::inHexstring(char ch)
void
QPDFTokenizer::inHexstring2nd(char ch)
{
- if ('0' <= ch && ch <= '9') {
- this->val += char(this->char_code + int(ch) - int('0'));
- this->state = st_in_hexstring;
-
- } else if ('A' <= ch && ch <= 'F') {
- this->val += char(this->char_code + 10 + int(ch) - int('A'));
- this->state = st_in_hexstring;
-
- } else if ('a' <= ch && ch <= 'f') {
- this->val += char(this->char_code + 10 + int(ch) - int('a'));
+ if (char hval = QUtil::hex_decode_char(ch); hval < '\20') {
+ this->val += char(this->char_code) | hval;
this->state = st_in_hexstring;
} else if (ch == '>') {
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 8287412c..de1aa45b 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -1441,8 +1441,13 @@ QPDFWriter::unparseObject(
if (level < 0) {
throw std::logic_error("invalid level in QPDFWriter::unparseObject");
}
-
- std::string const indent(static_cast<size_t>(2 * level), ' ');
+ // For non-qdf, "indent" is a single space between tokens.
+ // For qdf, indent includes the preceding newline.
+ std::string indent = " ";
+ if (m->qdf_mode) {
+ indent.append(static_cast<size_t>(2 * level), ' ');
+ indent[0] = '\n';
+ }
if (auto const tc = object.getTypeCode(); tc == ::ot_array) {
// Note: PDF spec 1.4 implementation note 121 states that
@@ -1451,16 +1456,12 @@ QPDFWriter::unparseObject(
// unconditionally for all arrays because it looks nicer and
// doesn't make the files that much bigger.
writeString("[");
- writeStringQDF("\n");
for (auto const& item: object.getArrayAsVector()) {
- writeStringQDF(indent);
+ writeString(indent);
writeStringQDF(" ");
- writeStringNoQDF(" ");
unparseChild(item, level + 1, child_flags);
- writeStringQDF("\n");
}
- writeStringQDF(indent);
- writeStringNoQDF(" ");
+ writeString(indent);
writeString("]");
} else if (tc == ::ot_dictionary) {
// Make a shallow copy of this object so we can modify it
@@ -1619,14 +1620,12 @@ QPDFWriter::unparseObject(
}
writeString("<<");
- writeStringQDF("\n");
for (auto& item: object.getDictAsMap()) {
if (!item.second.isNull()) {
auto const& key = item.first;
- writeStringQDF(indent);
+ writeString(indent);
writeStringQDF(" ");
- writeStringNoQDF(" ");
writeString(QPDF_Name::normalizeName(key));
writeString(" ");
if (key == "/Contents" && object.isDictionaryOfType("/Sig") &&
@@ -1639,14 +1638,13 @@ QPDFWriter::unparseObject(
} else {
unparseChild(item.second, level + 1, child_flags);
}
- writeStringQDF("\n");
}
}
if (flags & f_stream) {
- writeStringQDF(indent);
- writeStringQDF(" ");
- writeString(" /Length ");
+ writeString(indent);
+ writeStringQDF(" ");
+ writeString("/Length ");
if (this->m->direct_stream_lengths) {
writeString(std::to_string(stream_length));
@@ -1654,17 +1652,14 @@ QPDFWriter::unparseObject(
writeString(std::to_string(this->m->cur_stream_length_id));
writeString(" 0 R");
}
- writeStringQDF("\n");
if (compress && (flags & f_filtered)) {
- writeStringQDF(indent);
- writeStringQDF(" ");
- writeString(" /Filter /FlateDecode");
- writeStringQDF("\n");
+ writeString(indent);
+ writeStringQDF(" ");
+ writeString("/Filter /FlateDecode");
}
}
- writeStringQDF(indent);
- writeStringNoQDF(" ");
+ writeString(indent);
writeString(">>");
} else if (tc == ::ot_stream) {
// Write stream data to a buffer.
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index bae067b6..03301d9d 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -783,28 +783,25 @@ std::string
QUtil::hex_decode(std::string const& input)
{
std::string result;
- size_t pos = 0;
+ // We know result.size() <= 0.5 * input.size() + 1. However, reserving
+ // string space for this upper bound has a negative impact.
+ bool first = true;
+ char decoded;
for (auto ch: input) {
- bool skip = false;
- if ((ch >= 'A') && (ch <= 'F')) {
- ch = QIntC::to_char(ch - 'A' + 10);
- } else if ((ch >= 'a') && (ch <= 'f')) {
- ch = QIntC::to_char(ch - 'a' + 10);
- } else if ((ch >= '0') && (ch <= '9')) {
- ch = QIntC::to_char(ch - '0');
- } else {
- skip = true;
- }
- if (!skip) {
- if (pos == 0) {
- result.push_back(static_cast<char>(ch << 4));
- pos = 1;
+ ch = hex_decode_char(ch);
+ if (ch < '\20') {
+ if (first) {
+ decoded = static_cast<char>(ch << 4);
+ first = false;
} else {
- result[result.length() - 1] |= ch;
- pos = 0;
+ result.push_back(decoded | ch);
+ first = true;
}
}
}
+ if (!first) {
+ result.push_back(decoded);
+ }
return result;
}
diff --git a/manual/release-notes.rst b/manual/release-notes.rst
index 904c3e12..39b40ab4 100644
--- a/manual/release-notes.rst
+++ b/manual/release-notes.rst
@@ -8,6 +8,12 @@ For a detailed list of changes, please see the file
.. x.y.z: not yet released
+11.4.0: not yet released
+ - CLI Enhancements
+
+ - The :qpdf:ref:`--optimize-images` option now optimizes images
+ inside of form XObjects.
+
11.3.0: February 25, 2023
- CLI Enhancements
diff --git a/qpdf/qtest/image-optimization.test b/qpdf/qtest/image-optimization.test
index 1b3901e6..10ffd526 100644
--- a/qpdf/qtest/image-optimization.test
+++ b/qpdf/qtest/image-optimization.test
@@ -33,6 +33,8 @@ my @image_opt = (
['large-inline-image', 'inline-images-keep-all', '--keep-inline-images'],
['unsupported-optimization', 'unsupported',
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=0'],
+ ['nested-images', 'nested-images',
+ '--oi-min-width=0 --oi-min-height=0 --oi-min-area=0']
);
my $n_tests = 2 * scalar(@image_opt);
diff --git a/qpdf/qtest/invalid-objects.test b/qpdf/qtest/invalid-objects.test
index 6491ccdb..1ece3810 100644
--- a/qpdf/qtest/invalid-objects.test
+++ b/qpdf/qtest/invalid-objects.test
@@ -14,7 +14,7 @@ cleanup();
my $td = new TestDriver('invalid-objects');
-my $n_tests = 3;
+my $n_tests = 4;
$td->runtest("closed input source",
{$td->COMMAND => "test_driver 73 minimal.pdf"},
@@ -33,5 +33,10 @@ $td->runtest("object with zero offset",
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
+$td->runtest("catalog with invalid type entry",
+ {$td->COMMAND => "qpdf --check catalgg.pdf"},
+ {$td->FILE => "catalgg.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+
cleanup();
$td->report($n_tests);
diff --git a/qpdf/qtest/qpdf/catalgg.out b/qpdf/qtest/qpdf/catalgg.out
new file mode 100644
index 00000000..58fb244c
--- /dev/null
+++ b/qpdf/qtest/qpdf/catalgg.out
@@ -0,0 +1,6 @@
+checking catalgg.pdf
+WARNING: catalgg.pdf: catalog /Type entry missing or invalid
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+qpdf: operation succeeded with warnings
diff --git a/qpdf/qtest/qpdf/catalgg.pdf b/qpdf/qtest/qpdf/catalgg.pdf
new file mode 100644
index 00000000..7208c4b6
--- /dev/null
+++ b/qpdf/qtest/qpdf/catalgg.pdf
@@ -0,0 +1,79 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalgg
+ /Pages 2 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+0000000307 00000 n
+0000000403 00000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+>>
+startxref
+556
+%%EOF
diff --git a/qpdf/qtest/qpdf/nested-images.pdf b/qpdf/qtest/qpdf/nested-images.pdf
new file mode 100644
index 00000000..cb2b4d87
--- /dev/null
+++ b/qpdf/qtest/qpdf/nested-images.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/optimize-images-nested-images-json.out b/qpdf/qtest/qpdf/optimize-images-nested-images-json.out
new file mode 100644
index 00000000..9f713aa7
--- /dev/null
+++ b/qpdf/qtest/qpdf/optimize-images-nested-images-json.out
@@ -0,0 +1,18 @@
+{
+ "version": 2,
+ "parameters": {
+ "decodelevel": "generalized"
+ },
+ "pages": [
+ {
+ "contents": [
+ "4 0 R"
+ ],
+ "images": [],
+ "label": null,
+ "object": "3 0 R",
+ "outlines": [],
+ "pageposfrom1": 1
+ }
+ ]
+}
diff --git a/qpdf/qtest/qpdf/optimize-images-nested-images.out b/qpdf/qtest/qpdf/optimize-images-nested-images.out
new file mode 100644
index 00000000..253a9208
--- /dev/null
+++ b/qpdf/qtest/qpdf/optimize-images-nested-images.out
@@ -0,0 +1,2 @@
+qpdf: image /X1 on page 1: optimizing image reduces size from 2628 to ...
+qpdf: wrote file a.pdf