From 49621ef5a825fc0a600284fa2c33775b330e2007 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 19 Dec 2023 16:15:08 -0500 Subject: Add qpdf-test-compare for comparing PDFs with different zlib --- compare-for-test/qpdf-test-compare.cc | 215 ++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 compare-for-test/qpdf-test-compare.cc (limited to 'compare-for-test/qpdf-test-compare.cc') diff --git a/compare-for-test/qpdf-test-compare.cc b/compare-for-test/qpdf-test-compare.cc new file mode 100644 index 00000000..7873f4a2 --- /dev/null +++ b/compare-for-test/qpdf-test-compare.cc @@ -0,0 +1,215 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +static char const* whoami = nullptr; + +void +usage() +{ + std::cerr << "Usage: " << whoami << " actual expected" << std::endl + << R"(Where "actual" is the actual output and "expected" is the expected)" + << std::endl + << "output of a test, compare the two PDF files. The files are considered" + << std::endl + << "to match if all their objects are identical except that, if a stream is" + << std::endl + << "compressed with FlateDecode, the uncompressed data must match." << std::endl + << std::endl + << "If the files match, the output is the expected file. Otherwise, it is" + << std::endl + << "the actual file. Read comments in the test suite for rationale." << std::endl; + exit(2); +} + +void +cleanEncryption(QPDF& q) +{ + auto enc = q.getTrailer().getKey("/Encrypt"); + if (!enc.isDictionary()) { + return; + } + enc.removeKey("/O"); + enc.removeKey("/OE"); + enc.removeKey("/U"); + enc.removeKey("/UE"); + enc.removeKey("/Perms"); +} + +std::string +compareObjects(std::string const& label, QPDFObjectHandle act, QPDFObjectHandle exp) +{ + if (act.getTypeCode() != exp.getTypeCode()) { + QTC::TC("compare", "objects with different type"); + return label + ": different types"; + } + if (act.isStream()) { + auto act_dict = act.getDict(); + auto exp_dict = exp.getDict(); + act_dict.removeKey("/Length"); + exp_dict.removeKey("/Length"); + if (act_dict.unparse() != exp_dict.unparse()) { + QTC::TC("compare", "different stream dictionaries"); + return label + ": stream dictionaries differ"; + } + if (act_dict.getKey("/Type").isNameAndEquals("/XRef")) { + QTC::TC("compare", "ignore data for xref stream"); + return ""; + } + auto act_filters = act_dict.getKey("/Filter"); + bool uncompress = false; + if (act_filters.isName()) { + act_filters = act_filters.wrapInArray(); + } + if (act_filters.isArray()) { + for (auto& filter: act_filters.aitems()) { + if (filter.isNameAndEquals("/FlateDecode")) { + uncompress = true; + break; + } + } + } + std::shared_ptr act_data; + std::shared_ptr exp_data; + if (uncompress) { + QTC::TC("compare", "uncompressing"); + act_data = act.getStreamData(); + exp_data = exp.getStreamData(); + } else { + QTC::TC("compare", "not uncompressing"); + act_data = act.getRawStreamData(); + exp_data = exp.getRawStreamData(); + } + if (act_data->getSize() != exp_data->getSize()) { + QTC::TC("compare", "differing data size", uncompress ? 0 : 1); + return label + ": stream data size differs"; + } + auto act_buf = act_data->getBuffer(); + auto exp_buf = exp_data->getBuffer(); + if (memcmp(act_buf, exp_buf, act_data->getSize()) != 0) { + QTC::TC("compare", "different data", uncompress ? 0 : 1); + return label + ": stream data differs"; + } + } else if (act.unparseResolved() != exp.unparseResolved()) { + QTC::TC("compare", "different non-stream"); + return label + ": object contents differ"; + } + return ""; +} + +std::string +compare(char const* actual_filename, char const* expected_filename) +{ + QPDF actual; + actual.processFile(actual_filename); + QPDF expected; + expected.processFile(expected_filename); + // The motivation behind this program is to compare files in a way that allows for + // differences in the exact bytes of zlib compression. If all zlib implementations produced + // exactly the same output, we would just be able to use straight comparison, but since they + // don't, we use this. As such, we are enforcing a standard of "sameness" that goes beyond + // showing semantic equivalence. The only difference we are allowing is compressed data. + + auto act_trailer = actual.getTrailer(); + auto exp_trailer = expected.getTrailer(); + act_trailer.removeKey("/Length"); + exp_trailer.removeKey("/Length"); + auto trailer_diff = compareObjects("trailer", act_trailer, exp_trailer); + if (!trailer_diff.empty()) { + QTC::TC("compare", "different trailer"); + return trailer_diff; + } + + cleanEncryption(actual); + cleanEncryption(expected); + + auto actual_objects = actual.getAllObjects(); + auto expected_objects = expected.getAllObjects(); + if (actual_objects.size() != expected_objects.size()) { + // Not exercised in the test suite since the trailers will differ in this case. + return "different number of objects"; + } + for (size_t i = 0; i < actual_objects.size(); ++i) { + auto act = actual_objects[i]; + auto exp = expected_objects[i]; + auto act_og = act.getObjGen(); + auto exp_og = exp.getObjGen(); + if (act_og != exp_og) { + // not reproduced in the test suite + return "different object IDs"; + } + auto ret = compareObjects(act_og.unparse(), act, exp); + if (!ret.empty()) { + return ret; + } + } + return ""; +} + +int +main(int argc, char* argv[]) +{ + if ((whoami = strrchr(argv[0], '/')) == nullptr) { + whoami = argv[0]; + } else { + ++whoami; + } + + if ((argc == 2) && (strcmp(argv[1], "--version") == 0)) { + std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl; + exit(0); + } + + if (argc != 3) { + usage(); + } + + bool show_why = QUtil::get_env("QPDF_COMPARE_WHY"); + try { + char const* to_output; + auto actual = argv[1]; + auto expected = argv[2]; + auto difference = compare(actual, expected); + if (difference.empty()) { + // The files are identical; write the expected file. This way, tests can be written + // that compare the output of this program to the expected file. + to_output = expected; + } else { + if (show_why) { + std::cerr << difference << std::endl; + exit(2); + } + // The files differ; write the actual file. If it is determined that the actual file + // is correct because of changes that result in intended differences, this enables + // the output of this program to replace the expected file in the test suite. + to_output = actual; + } + auto f = QUtil::safe_fopen(to_output, "rb"); + QUtil::FileCloser fc(f); + QUtil::binary_stdout(); + auto out = std::make_unique("stdout", stdout); + unsigned char buf[2048]; + bool done = false; + while (!done) { + size_t len = fread(buf, 1, sizeof(buf), f); + if (len <= 0) { + done = true; + } else { + out->write(buf, len); + } + } + if (!difference.empty()) { + exit(2); + } + } catch (std::exception& e) { + std::cerr << whoami << ": " << e.what() << std::endl; + exit(2); + } + return 0; +} -- cgit v1.2.3-70-g09d2