diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | TODO | 8 | ||||
-rw-r--r-- | cSpell.json | 3 | ||||
-rw-r--r-- | include/qpdf/QPDF.hh | 20 | ||||
-rw-r--r-- | include/qpdf/QUtil.hh | 17 | ||||
-rw-r--r-- | libqpdf/JSON.cc | 8 | ||||
-rw-r--r-- | libqpdf/QPDF.cc | 7 | ||||
-rw-r--r-- | libqpdf/QPDFJob.cc | 50 | ||||
-rw-r--r-- | libqpdf/QPDFTokenizer.cc | 48 | ||||
-rw-r--r-- | libqpdf/QPDFWriter.cc | 39 | ||||
-rw-r--r-- | libqpdf/QUtil.cc | 31 | ||||
-rw-r--r-- | manual/release-notes.rst | 6 | ||||
-rw-r--r-- | qpdf/qtest/image-optimization.test | 2 | ||||
-rw-r--r-- | qpdf/qtest/invalid-objects.test | 7 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/catalgg.out | 6 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/catalgg.pdf | 79 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/nested-images.pdf | bin | 0 -> 5744 bytes | |||
-rw-r--r-- | qpdf/qtest/qpdf/optimize-images-nested-images-json.out | 18 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/optimize-images-nested-images.out | 2 |
19 files changed, 238 insertions, 119 deletions
@@ -1,3 +1,9 @@ +2023-03-18 Jay Berkenbilt <ejb@ql.org> + + * Enhance --optimize-images to support images nested inside of + form XObjects. Thanks to Connor Osborne (github user cdosborn) for + the contribution. Fixes #923. + 2023-02-25 Jay Berkenbilt <ejb@ql.org> * 11.3.0: release @@ -263,6 +263,14 @@ Always: For qpdf 12, see https://github.com/qpdf/qpdf/discussions/785 +C++ Version Changes +=================== + +Use +// C++NN: ... +to mark places in the code that should be updated when we require at +least that version of C++. + Page splitting/merging ====================== diff --git a/cSpell.json b/cSpell.json index 2e33690e..8b88eb32 100644 --- a/cSpell.json +++ b/cSpell.json @@ -27,8 +27,6 @@ "autobuilder", "autobuilders", "autofiles", - "autofiles", - "autogen", "autogen", "autolabel", "autopkgtest", @@ -119,7 +117,6 @@ "encodable", "encp", "endforeach", - "endforeach", "endfunction", "endianness", "endl", diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 13b66977..4541db64 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -910,8 +910,7 @@ class QPDF } }; - // The ParseGuard class allows QPDFObjectHandle to detect - // re-entrant parsing. + // The ParseGuard class allows QPDFParser to detect re-entrant parsing. class ParseGuard { friend class QPDFParser; @@ -933,7 +932,7 @@ class QPDF QPDF* qpdf; }; - // Pipe class is restricted to QPDF_Stream + // Pipe class is restricted to QPDF_Stream. class Pipe { friend class QPDF_Stream; @@ -961,6 +960,20 @@ class QPDF } }; + // JobSetter class is restricted to QPDFJob. + class JobSetter + { + friend class QPDFJob; + + private: + // Enable enhanced warnings for pdf file checking. + static void + setCheckMode(QPDF& qpdf, bool val) + { + qpdf.m->check_mode = val; + } + }; + // For testing only -- do not add to DLL static bool test_json_validators(); @@ -1698,6 +1711,7 @@ class QPDF bool ignore_xref_streams{false}; bool suppress_warnings{false}; bool attempt_recovery{true}; + bool check_mode{false}; std::shared_ptr<EncryptionParameters> encp; std::string pdf_version; std::map<QPDFObjGen, QPDFXRefEntry> xref_table; diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index b42fe195..4d46f630 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -223,6 +223,11 @@ namespace QUtil QPDF_DLL std::string hex_decode(std::string const&); + // Decode a single hex digit into a char in the range 0 <= char < 16. Return + // a char >= 16 if digit is not a valid hex digit. + QPDF_DLL + inline constexpr char hex_decode_char(char digit) noexcept; + // Set stdin, stdout to binary mode QPDF_DLL void binary_stdout(); @@ -550,8 +555,7 @@ namespace QUtil inline bool QUtil::is_hex_digit(char ch) { - return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || - ('A' <= ch && ch <= 'F'); + return hex_decode_char(ch) < '\20'; } inline bool @@ -603,4 +607,13 @@ QUtil::hex_encode_char(char c) '#', hexchars[static_cast<unsigned char>(c) >> 4], hexchars[c & 0x0f]}; } +inline constexpr char +QUtil::hex_decode_char(char digit) noexcept +{ + return digit <= '9' && digit >= '0' + ? char(digit - '0') + : (digit >= 'a' ? char(digit - 'a' + 10) + : (digit >= 'A' ? char(digit - 'A' + 10) : '\20')); +} + #endif // QUTIL_HH diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index cb60eabc..fbf06f88 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1121,12 +1121,8 @@ JSONParser::getToken() case ls_u4: using ui = unsigned int; - if ('0' <= *p && *p <= '9') { - u_value = 16 * u_value + (ui(*p) - ui('0')); - } else if ('a' <= *p && *p <= 'f') { - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); - } else if ('A' <= *p && *p <= 'F') { - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) { + u_value = 16 * u_value + val; } else { tokenError(); } diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 063c0f55..83944be4 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2461,6 +2461,13 @@ QPDF::getRoot() QPDFObjectHandle root = this->m->trailer.getKey("/Root"); if (!root.isDictionary()) { throw damagedPDF("", 0, "unable to find /Root dictionary"); + } else if ( + // Check_mode is an interim solution to request #810 pending a more + // comprehensive review of the approach to more extensive checks and + // warning levels. + m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { + warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); + root.replaceKey("/Type", "/Catalog"_qpdf); } return root; } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index a4b1a026..fbaa53e5 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -798,6 +798,7 @@ QPDFJob::doCheck(QPDF& pdf) bool okay = true; auto& cout = *this->m->log->getInfo(); cout << "checking " << m->infilename.get() << "\n"; + QPDF::JobSetter::setCheckMode(pdf, true); try { int extension_level = pdf.getExtensionLevel(); cout << "PDF Version: " << pdf.getPDFVersion(); @@ -2363,31 +2364,30 @@ QPDFJob::handleTransformations(QPDF& pdf) int pageno = 0; for (auto& ph: dh.getAllPages()) { ++pageno; - QPDFObjectHandle page = ph.getObjectHandle(); - for (auto& iter2: ph.getImages()) { - std::string name = iter2.first; - QPDFObjectHandle& image = iter2.second; - ImageOptimizer* io = new ImageOptimizer( - *this, - m->oi_min_width, - m->oi_min_height, - m->oi_min_area, - image); - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> sdp(io); - if (io->evaluate( - "image " + name + " on page " + - std::to_string(pageno))) { - QPDFObjectHandle new_image = pdf.newStream(); - new_image.replaceDict(image.getDict().shallowCopy()); - new_image.replaceStreamData( - sdp, - QPDFObjectHandle::newName("/DCTDecode"), - QPDFObjectHandle::newNull()); - ph.getAttribute("/Resources", true) - .getKey("/XObject") - .replaceKey(name, new_image); - } - } + ph.forEachImage( + true, + [this, pageno, &pdf]( + QPDFObjectHandle& obj, + QPDFObjectHandle& xobj_dict, + std::string const& key) { + auto io = std::make_unique<ImageOptimizer>( + *this, + m->oi_min_width, + m->oi_min_height, + m->oi_min_area, + obj); + if (io->evaluate( + "image " + key + " on page " + + std::to_string(pageno))) { + QPDFObjectHandle new_image = pdf.newStream(); + new_image.replaceDict(obj.getDict().shallowCopy()); + new_image.replaceStreamData( + std::move(io), + QPDFObjectHandle::newName("/DCTDecode"), + QPDFObjectHandle::newNull()); + xobj_dict.replaceKey(key, new_image); + } + }); } } if (m->generate_appearances) { diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 990d5b65..d8d457ab 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -449,18 +449,9 @@ QPDFTokenizer::inNameHex1(char ch) { this->hex_char = ch; - if ('0' <= ch && ch <= '9') { - this->char_code = 16 * (int(ch) - int('0')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code = int(hval) << 4; this->state = st_name_hex2; - - } else if ('A' <= ch && ch <= 'F') { - this->char_code = 16 * (10 + int(ch) - int('A')); - this->state = st_name_hex2; - - } else if ('a' <= ch && ch <= 'f') { - this->char_code = 16 * (10 + int(ch) - int('a')); - this->state = st_name_hex2; - } else { QTC::TC("qpdf", "QPDFTokenizer bad name 1"); this->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -475,15 +466,8 @@ QPDFTokenizer::inNameHex1(char ch) void QPDFTokenizer::inNameHex2(char ch) { - if ('0' <= ch && ch <= '9') { - this->char_code += int(ch) - int('0'); - - } else if ('A' <= ch && ch <= 'F') { - this->char_code += 10 + int(ch) - int('A'); - - } else if ('a' <= ch && ch <= 'f') { - this->char_code += 10 + int(ch) - int('a'); - + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code |= int(hval); } else { QTC::TC("qpdf", "QPDFTokenizer bad name 2"); this->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -675,16 +659,8 @@ QPDFTokenizer::inLiteral(char ch) void QPDFTokenizer::inHexstring(char ch) { - if ('0' <= ch && ch <= '9') { - this->char_code = 16 * (int(ch) - int('0')); - this->state = st_in_hexstring_2nd; - - } else if ('A' <= ch && ch <= 'F') { - this->char_code = 16 * (10 + int(ch) - int('A')); - this->state = st_in_hexstring_2nd; - - } else if ('a' <= ch && ch <= 'f') { - this->char_code = 16 * (10 + int(ch) - int('a')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code = int(hval) << 4; this->state = st_in_hexstring_2nd; } else if (ch == '>') { @@ -706,16 +682,8 @@ QPDFTokenizer::inHexstring(char ch) void QPDFTokenizer::inHexstring2nd(char ch) { - if ('0' <= ch && ch <= '9') { - this->val += char(this->char_code + int(ch) - int('0')); - this->state = st_in_hexstring; - - } else if ('A' <= ch && ch <= 'F') { - this->val += char(this->char_code + 10 + int(ch) - int('A')); - this->state = st_in_hexstring; - - } else if ('a' <= ch && ch <= 'f') { - this->val += char(this->char_code + 10 + int(ch) - int('a')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->val += char(this->char_code) | hval; this->state = st_in_hexstring; } else if (ch == '>') { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 8287412c..de1aa45b 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1441,8 +1441,13 @@ QPDFWriter::unparseObject( if (level < 0) { throw std::logic_error("invalid level in QPDFWriter::unparseObject"); } - - std::string const indent(static_cast<size_t>(2 * level), ' '); + // For non-qdf, "indent" is a single space between tokens. + // For qdf, indent includes the preceding newline. + std::string indent = " "; + if (m->qdf_mode) { + indent.append(static_cast<size_t>(2 * level), ' '); + indent[0] = '\n'; + } if (auto const tc = object.getTypeCode(); tc == ::ot_array) { // Note: PDF spec 1.4 implementation note 121 states that @@ -1451,16 +1456,12 @@ QPDFWriter::unparseObject( // unconditionally for all arrays because it looks nicer and // doesn't make the files that much bigger. writeString("["); - writeStringQDF("\n"); for (auto const& item: object.getArrayAsVector()) { - writeStringQDF(indent); + writeString(indent); writeStringQDF(" "); - writeStringNoQDF(" "); unparseChild(item, level + 1, child_flags); - writeStringQDF("\n"); } - writeStringQDF(indent); - writeStringNoQDF(" "); + writeString(indent); writeString("]"); } else if (tc == ::ot_dictionary) { // Make a shallow copy of this object so we can modify it @@ -1619,14 +1620,12 @@ QPDFWriter::unparseObject( } writeString("<<"); - writeStringQDF("\n"); for (auto& item: object.getDictAsMap()) { if (!item.second.isNull()) { auto const& key = item.first; - writeStringQDF(indent); + writeString(indent); writeStringQDF(" "); - writeStringNoQDF(" "); writeString(QPDF_Name::normalizeName(key)); writeString(" "); if (key == "/Contents" && object.isDictionaryOfType("/Sig") && @@ -1639,14 +1638,13 @@ QPDFWriter::unparseObject( } else { unparseChild(item.second, level + 1, child_flags); } - writeStringQDF("\n"); } } if (flags & f_stream) { - writeStringQDF(indent); - writeStringQDF(" "); - writeString(" /Length "); + writeString(indent); + writeStringQDF(" "); + writeString("/Length "); if (this->m->direct_stream_lengths) { writeString(std::to_string(stream_length)); @@ -1654,17 +1652,14 @@ QPDFWriter::unparseObject( writeString(std::to_string(this->m->cur_stream_length_id)); writeString(" 0 R"); } - writeStringQDF("\n"); if (compress && (flags & f_filtered)) { - writeStringQDF(indent); - writeStringQDF(" "); - writeString(" /Filter /FlateDecode"); - writeStringQDF("\n"); + writeString(indent); + writeStringQDF(" "); + writeString("/Filter /FlateDecode"); } } - writeStringQDF(indent); - writeStringNoQDF(" "); + writeString(indent); writeString(">>"); } else if (tc == ::ot_stream) { // Write stream data to a buffer. diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index bae067b6..03301d9d 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -783,28 +783,25 @@ std::string QUtil::hex_decode(std::string const& input) { std::string result; - size_t pos = 0; + // We know result.size() <= 0.5 * input.size() + 1. However, reserving + // string space for this upper bound has a negative impact. + bool first = true; + char decoded; for (auto ch: input) { - bool skip = false; - if ((ch >= 'A') && (ch <= 'F')) { - ch = QIntC::to_char(ch - 'A' + 10); - } else if ((ch >= 'a') && (ch <= 'f')) { - ch = QIntC::to_char(ch - 'a' + 10); - } else if ((ch >= '0') && (ch <= '9')) { - ch = QIntC::to_char(ch - '0'); - } else { - skip = true; - } - if (!skip) { - if (pos == 0) { - result.push_back(static_cast<char>(ch << 4)); - pos = 1; + ch = hex_decode_char(ch); + if (ch < '\20') { + if (first) { + decoded = static_cast<char>(ch << 4); + first = false; } else { - result[result.length() - 1] |= ch; - pos = 0; + result.push_back(decoded | ch); + first = true; } } } + if (!first) { + result.push_back(decoded); + } return result; } diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 904c3e12..39b40ab4 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -8,6 +8,12 @@ For a detailed list of changes, please see the file .. x.y.z: not yet released +11.4.0: not yet released + - CLI Enhancements + + - The :qpdf:ref:`--optimize-images` option now optimizes images + inside of form XObjects. + 11.3.0: February 25, 2023 - CLI Enhancements diff --git a/qpdf/qtest/image-optimization.test b/qpdf/qtest/image-optimization.test index 1b3901e6..10ffd526 100644 --- a/qpdf/qtest/image-optimization.test +++ b/qpdf/qtest/image-optimization.test @@ -33,6 +33,8 @@ my @image_opt = ( ['large-inline-image', 'inline-images-keep-all', '--keep-inline-images'], ['unsupported-optimization', 'unsupported', '--oi-min-width=0 --oi-min-height=0 --oi-min-area=0'], + ['nested-images', 'nested-images', + '--oi-min-width=0 --oi-min-height=0 --oi-min-area=0'] ); my $n_tests = 2 * scalar(@image_opt); diff --git a/qpdf/qtest/invalid-objects.test b/qpdf/qtest/invalid-objects.test index 6491ccdb..1ece3810 100644 --- a/qpdf/qtest/invalid-objects.test +++ b/qpdf/qtest/invalid-objects.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('invalid-objects'); -my $n_tests = 3; +my $n_tests = 4; $td->runtest("closed input source", {$td->COMMAND => "test_driver 73 minimal.pdf"}, @@ -33,5 +33,10 @@ $td->runtest("object with zero offset", {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +$td->runtest("catalog with invalid type entry", + {$td->COMMAND => "qpdf --check catalgg.pdf"}, + {$td->FILE => "catalgg.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + cleanup(); $td->report($n_tests); diff --git a/qpdf/qtest/qpdf/catalgg.out b/qpdf/qtest/qpdf/catalgg.out new file mode 100644 index 00000000..58fb244c --- /dev/null +++ b/qpdf/qtest/qpdf/catalgg.out @@ -0,0 +1,6 @@ +checking catalgg.pdf +WARNING: catalgg.pdf: catalog /Type entry missing or invalid +PDF Version: 1.3 +File is not encrypted +File is not linearized +qpdf: operation succeeded with warnings diff --git a/qpdf/qtest/qpdf/catalgg.pdf b/qpdf/qtest/qpdf/catalgg.pdf new file mode 100644 index 00000000..7208c4b6 --- /dev/null +++ b/qpdf/qtest/qpdf/catalgg.pdf @@ -0,0 +1,79 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalgg + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref +556 +%%EOF diff --git a/qpdf/qtest/qpdf/nested-images.pdf b/qpdf/qtest/qpdf/nested-images.pdf Binary files differnew file mode 100644 index 00000000..cb2b4d87 --- /dev/null +++ b/qpdf/qtest/qpdf/nested-images.pdf diff --git a/qpdf/qtest/qpdf/optimize-images-nested-images-json.out b/qpdf/qtest/qpdf/optimize-images-nested-images-json.out new file mode 100644 index 00000000..9f713aa7 --- /dev/null +++ b/qpdf/qtest/qpdf/optimize-images-nested-images-json.out @@ -0,0 +1,18 @@ +{ + "version": 2, + "parameters": { + "decodelevel": "generalized" + }, + "pages": [ + { + "contents": [ + "4 0 R" + ], + "images": [], + "label": null, + "object": "3 0 R", + "outlines": [], + "pageposfrom1": 1 + } + ] +} diff --git a/qpdf/qtest/qpdf/optimize-images-nested-images.out b/qpdf/qtest/qpdf/optimize-images-nested-images.out new file mode 100644 index 00000000..253a9208 --- /dev/null +++ b/qpdf/qtest/qpdf/optimize-images-nested-images.out @@ -0,0 +1,2 @@ +qpdf: image /X1 on page 1: optimizing image reduces size from 2628 to ... +qpdf: wrote file a.pdf |