diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/JSON.cc | 8 | ||||
-rw-r--r-- | libqpdf/QPDF.cc | 7 | ||||
-rw-r--r-- | libqpdf/QPDFJob.cc | 50 | ||||
-rw-r--r-- | libqpdf/QPDFTokenizer.cc | 48 | ||||
-rw-r--r-- | libqpdf/QPDFWriter.cc | 39 | ||||
-rw-r--r-- | libqpdf/QUtil.cc | 31 |
6 files changed, 73 insertions, 110 deletions
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index cb60eabc..fbf06f88 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1121,12 +1121,8 @@ JSONParser::getToken() case ls_u4: using ui = unsigned int; - if ('0' <= *p && *p <= '9') { - u_value = 16 * u_value + (ui(*p) - ui('0')); - } else if ('a' <= *p && *p <= 'f') { - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); - } else if ('A' <= *p && *p <= 'F') { - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); + if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) { + u_value = 16 * u_value + val; } else { tokenError(); } diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 063c0f55..83944be4 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2461,6 +2461,13 @@ QPDF::getRoot() QPDFObjectHandle root = this->m->trailer.getKey("/Root"); if (!root.isDictionary()) { throw damagedPDF("", 0, "unable to find /Root dictionary"); + } else if ( + // Check_mode is an interim solution to request #810 pending a more + // comprehensive review of the approach to more extensive checks and + // warning levels. + m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { + warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); + root.replaceKey("/Type", "/Catalog"_qpdf); } return root; } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index a4b1a026..fbaa53e5 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -798,6 +798,7 @@ QPDFJob::doCheck(QPDF& pdf) bool okay = true; auto& cout = *this->m->log->getInfo(); cout << "checking " << m->infilename.get() << "\n"; + QPDF::JobSetter::setCheckMode(pdf, true); try { int extension_level = pdf.getExtensionLevel(); cout << "PDF Version: " << pdf.getPDFVersion(); @@ -2363,31 +2364,30 @@ QPDFJob::handleTransformations(QPDF& pdf) int pageno = 0; for (auto& ph: dh.getAllPages()) { ++pageno; - QPDFObjectHandle page = ph.getObjectHandle(); - for (auto& iter2: ph.getImages()) { - std::string name = iter2.first; - QPDFObjectHandle& image = iter2.second; - ImageOptimizer* io = new ImageOptimizer( - *this, - m->oi_min_width, - m->oi_min_height, - m->oi_min_area, - image); - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> sdp(io); - if (io->evaluate( - "image " + name + " on page " + - std::to_string(pageno))) { - QPDFObjectHandle new_image = pdf.newStream(); - new_image.replaceDict(image.getDict().shallowCopy()); - new_image.replaceStreamData( - sdp, - QPDFObjectHandle::newName("/DCTDecode"), - QPDFObjectHandle::newNull()); - ph.getAttribute("/Resources", true) - .getKey("/XObject") - .replaceKey(name, new_image); - } - } + ph.forEachImage( + true, + [this, pageno, &pdf]( + QPDFObjectHandle& obj, + QPDFObjectHandle& xobj_dict, + std::string const& key) { + auto io = std::make_unique<ImageOptimizer>( + *this, + m->oi_min_width, + m->oi_min_height, + m->oi_min_area, + obj); + if (io->evaluate( + "image " + key + " on page " + + std::to_string(pageno))) { + QPDFObjectHandle new_image = pdf.newStream(); + new_image.replaceDict(obj.getDict().shallowCopy()); + new_image.replaceStreamData( + std::move(io), + QPDFObjectHandle::newName("/DCTDecode"), + QPDFObjectHandle::newNull()); + xobj_dict.replaceKey(key, new_image); + } + }); } } if (m->generate_appearances) { diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 990d5b65..d8d457ab 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -449,18 +449,9 @@ QPDFTokenizer::inNameHex1(char ch) { this->hex_char = ch; - if ('0' <= ch && ch <= '9') { - this->char_code = 16 * (int(ch) - int('0')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code = int(hval) << 4; this->state = st_name_hex2; - - } else if ('A' <= ch && ch <= 'F') { - this->char_code = 16 * (10 + int(ch) - int('A')); - this->state = st_name_hex2; - - } else if ('a' <= ch && ch <= 'f') { - this->char_code = 16 * (10 + int(ch) - int('a')); - this->state = st_name_hex2; - } else { QTC::TC("qpdf", "QPDFTokenizer bad name 1"); this->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -475,15 +466,8 @@ QPDFTokenizer::inNameHex1(char ch) void QPDFTokenizer::inNameHex2(char ch) { - if ('0' <= ch && ch <= '9') { - this->char_code += int(ch) - int('0'); - - } else if ('A' <= ch && ch <= 'F') { - this->char_code += 10 + int(ch) - int('A'); - - } else if ('a' <= ch && ch <= 'f') { - this->char_code += 10 + int(ch) - int('a'); - + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code |= int(hval); } else { QTC::TC("qpdf", "QPDFTokenizer bad name 2"); this->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -675,16 +659,8 @@ QPDFTokenizer::inLiteral(char ch) void QPDFTokenizer::inHexstring(char ch) { - if ('0' <= ch && ch <= '9') { - this->char_code = 16 * (int(ch) - int('0')); - this->state = st_in_hexstring_2nd; - - } else if ('A' <= ch && ch <= 'F') { - this->char_code = 16 * (10 + int(ch) - int('A')); - this->state = st_in_hexstring_2nd; - - } else if ('a' <= ch && ch <= 'f') { - this->char_code = 16 * (10 + int(ch) - int('a')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->char_code = int(hval) << 4; this->state = st_in_hexstring_2nd; } else if (ch == '>') { @@ -706,16 +682,8 @@ QPDFTokenizer::inHexstring(char ch) void QPDFTokenizer::inHexstring2nd(char ch) { - if ('0' <= ch && ch <= '9') { - this->val += char(this->char_code + int(ch) - int('0')); - this->state = st_in_hexstring; - - } else if ('A' <= ch && ch <= 'F') { - this->val += char(this->char_code + 10 + int(ch) - int('A')); - this->state = st_in_hexstring; - - } else if ('a' <= ch && ch <= 'f') { - this->val += char(this->char_code + 10 + int(ch) - int('a')); + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + this->val += char(this->char_code) | hval; this->state = st_in_hexstring; } else if (ch == '>') { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 8287412c..de1aa45b 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1441,8 +1441,13 @@ QPDFWriter::unparseObject( if (level < 0) { throw std::logic_error("invalid level in QPDFWriter::unparseObject"); } - - std::string const indent(static_cast<size_t>(2 * level), ' '); + // For non-qdf, "indent" is a single space between tokens. + // For qdf, indent includes the preceding newline. + std::string indent = " "; + if (m->qdf_mode) { + indent.append(static_cast<size_t>(2 * level), ' '); + indent[0] = '\n'; + } if (auto const tc = object.getTypeCode(); tc == ::ot_array) { // Note: PDF spec 1.4 implementation note 121 states that @@ -1451,16 +1456,12 @@ QPDFWriter::unparseObject( // unconditionally for all arrays because it looks nicer and // doesn't make the files that much bigger. writeString("["); - writeStringQDF("\n"); for (auto const& item: object.getArrayAsVector()) { - writeStringQDF(indent); + writeString(indent); writeStringQDF(" "); - writeStringNoQDF(" "); unparseChild(item, level + 1, child_flags); - writeStringQDF("\n"); } - writeStringQDF(indent); - writeStringNoQDF(" "); + writeString(indent); writeString("]"); } else if (tc == ::ot_dictionary) { // Make a shallow copy of this object so we can modify it @@ -1619,14 +1620,12 @@ QPDFWriter::unparseObject( } writeString("<<"); - writeStringQDF("\n"); for (auto& item: object.getDictAsMap()) { if (!item.second.isNull()) { auto const& key = item.first; - writeStringQDF(indent); + writeString(indent); writeStringQDF(" "); - writeStringNoQDF(" "); writeString(QPDF_Name::normalizeName(key)); writeString(" "); if (key == "/Contents" && object.isDictionaryOfType("/Sig") && @@ -1639,14 +1638,13 @@ QPDFWriter::unparseObject( } else { unparseChild(item.second, level + 1, child_flags); } - writeStringQDF("\n"); } } if (flags & f_stream) { - writeStringQDF(indent); - writeStringQDF(" "); - writeString(" /Length "); + writeString(indent); + writeStringQDF(" "); + writeString("/Length "); if (this->m->direct_stream_lengths) { writeString(std::to_string(stream_length)); @@ -1654,17 +1652,14 @@ QPDFWriter::unparseObject( writeString(std::to_string(this->m->cur_stream_length_id)); writeString(" 0 R"); } - writeStringQDF("\n"); if (compress && (flags & f_filtered)) { - writeStringQDF(indent); - writeStringQDF(" "); - writeString(" /Filter /FlateDecode"); - writeStringQDF("\n"); + writeString(indent); + writeStringQDF(" "); + writeString("/Filter /FlateDecode"); } } - writeStringQDF(indent); - writeStringNoQDF(" "); + writeString(indent); writeString(">>"); } else if (tc == ::ot_stream) { // Write stream data to a buffer. diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index bae067b6..03301d9d 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -783,28 +783,25 @@ std::string QUtil::hex_decode(std::string const& input) { std::string result; - size_t pos = 0; + // We know result.size() <= 0.5 * input.size() + 1. However, reserving + // string space for this upper bound has a negative impact. + bool first = true; + char decoded; for (auto ch: input) { - bool skip = false; - if ((ch >= 'A') && (ch <= 'F')) { - ch = QIntC::to_char(ch - 'A' + 10); - } else if ((ch >= 'a') && (ch <= 'f')) { - ch = QIntC::to_char(ch - 'a' + 10); - } else if ((ch >= '0') && (ch <= '9')) { - ch = QIntC::to_char(ch - '0'); - } else { - skip = true; - } - if (!skip) { - if (pos == 0) { - result.push_back(static_cast<char>(ch << 4)); - pos = 1; + ch = hex_decode_char(ch); + if (ch < '\20') { + if (first) { + decoded = static_cast<char>(ch << 4); + first = false; } else { - result[result.length() - 1] |= ch; - pos = 0; + result.push_back(decoded | ch); + first = true; } } } + if (!first) { + result.push_back(decoded); + } return result; } |