From c76536dd9a150adb71fdcda11ee1a93f25128cc7 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 7 May 2022 13:33:45 -0400 Subject: Implement JSON v2 output --- libqpdf/QPDFJob.cc | 156 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 141 insertions(+), 15 deletions(-) (limited to 'libqpdf/QPDFJob.cc') diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 521377f8..621e6933 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -401,6 +401,7 @@ QPDFJob::Members::Members() : flatten_rotation(false), list_attachments(false), json_version(0), + json_stream_data(qpdf_sj_none), test_json_schema(false), check(false), optimize_images(false), @@ -695,6 +696,17 @@ QPDFJob::checkConfiguration() " use --replace-input to intentionally" " overwrite the input file"); } + + if (m->json_version == 1) { + if (m->json_keys.count("qpdf")) { + usage("json key \"qpdf\" is not valid for json version 1"); + } + } else { + if (m->json_keys.count("objects") || m->json_keys.count("objectinfo")) { + usage("json keys \"objects\" and \"objectinfo\" are only valid for " + "json version 1"); + } + } } unsigned long @@ -1103,6 +1115,102 @@ QPDFJob::doJSONObjectinfo(Pipeline* p, bool& first, QPDF& pdf) JSON::writeDictionaryClose(p, first_object, 1); } +void +QPDFJob::doJSONStream( + Pipeline* p, + bool& first, + QPDF& pdf, + QPDFObjectHandle& obj, + std::string const& file_prefix) +{ + Pipeline* stream_p = nullptr; + FILE* f = nullptr; + std::shared_ptr f_pl; + std::string filename; + if (this->m->json_stream_data == qpdf_sj_file) { + filename = file_prefix + "-" + QUtil::int_to_string(obj.getObjectID()); + f = QUtil::safe_fopen(filename.c_str(), "wb"); + f_pl = std::make_shared("stream data", f); + stream_p = f_pl.get(); + } + auto j = JSON::makeDictionary(); + j.addDictionaryMember( + "stream", + obj.getStreamJSON( + this->m->json_version, + this->m->json_stream_data, + this->m->decode_level, + stream_p, + filename)); + + JSON::writeDictionaryItem(p, first, "obj:" + obj.unparse(), j, 2); + if (f) { + f_pl->finish(); + f_pl = nullptr; + fclose(f); + } +} + +void +QPDFJob::doJSONObject( + Pipeline* p, + bool& first, + QPDF& pdf, + std::string const& key, + QPDFObjectHandle& obj) +{ + auto j = JSON::makeDictionary(); + j.addDictionaryMember("value", obj.getJSON(this->m->json_version, true)); + JSON::writeDictionaryItem(p, first, key, j, 2); +} + +void +QPDFJob::doJSONQpdf(Pipeline* p, bool& first, QPDF& pdf) +{ + std::string file_prefix = this->m->json_stream_prefix; + if (this->m->json_stream_data == qpdf_sj_file) { + if (file_prefix.empty()) { + if (this->m->infilename.get()) { + file_prefix = this->m->infilename.get(); + } + if (file_prefix.empty()) { + usage( + "please specify --json-stream-prefix since the input file " + "name is unknown"); + } + } + } + + JSON::writeDictionaryKey(p, first, "qpdf", 0); + bool first_qpdf = true; + JSON::writeDictionaryOpen(p, first_qpdf, 1); + JSON::writeDictionaryItem( + p, first_qpdf, "jsonversion", JSON::makeInt(this->m->json_version), 1); + JSON::writeDictionaryItem( + p, first_qpdf, "pdfversion", JSON::makeString(pdf.getPDFVersion()), 1); + JSON::writeDictionaryKey(p, first_qpdf, "objects", 1); + bool first_object = true; + JSON::writeDictionaryOpen(p, first_object, 2); + bool all_objects = m->json_objects.empty(); + std::set wanted_og = getWantedJSONObjects(); + std::vector objects = pdf.getAllObjects(); + for (auto& obj: objects) { + if (all_objects || wanted_og.count(obj.getObjGen())) { + if (obj.isStream()) { + doJSONStream(p, first_object, pdf, obj, file_prefix); + } else { + doJSONObject(p, first_object, pdf, "obj:" + obj.unparse(), obj); + } + } + } + if (all_objects || m->json_objects.count("trailer")) { + auto trailer = pdf.getTrailer(); + doJSONObject(p, first_object, pdf, "trailer", trailer); + } + JSON::writeDictionaryClose(p, first_object, 2); + JSON::writeDictionaryClose(p, first_qpdf, 1); +} + void QPDFJob::doJSONPages(Pipeline* p, bool& first, QPDF& pdf) { @@ -1482,14 +1590,15 @@ QPDFJob::json_schema(int json_version, std::set* keys) // The list of selectable top-level keys id duplicated in the // following places: job.yml, QPDFJob::json_schema, and // QPDFJob::doJSON. - if (all_keys || keys->count("objects")) { - schema.addDictionaryMember("objects", JSON::parse(R"({ + if (json_version == 1) { + if (all_keys || keys->count("objects")) { + schema.addDictionaryMember("objects", JSON::parse(R"({ "": "json representation of object" })")); - } - if (all_keys || keys->count("objectinfo")) { - JSON objectinfo = - schema.addDictionaryMember("objectinfo", JSON::parse(R"({ + } + if (all_keys || keys->count("objectinfo")) { + JSON objectinfo = + schema.addDictionaryMember("objectinfo", JSON::parse(R"({ "": { "stream": { "filter": "if stream, its filters, otherwise null", @@ -1498,6 +1607,17 @@ QPDFJob::json_schema(int json_version, std::set* keys) } } })")); + } + } else { + if (all_keys || keys->count("qpdf")) { + schema.addDictionaryMember("qpdf", JSON::parse(R"({ + "jsonversion": "qpdf json output version", + "pdfversion": "PDF version from PDF header", + "objects": { + "": "json representation of object" + } +})")); + } } if (all_keys || keys->count("pages")) { JSON page = schema.addDictionaryMember("pages", JSON::parse(R"([ @@ -1705,15 +1825,21 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) doJSONOutlines(p, first, pdf); } - // We do objects and objectinfo last so their information is - // consistent with repairing the page tree. To see the original - // file with any page tree problems and the page tree not - // flattened, select objects/objectinfo without other keys. - if (all_keys || m->json_keys.count("objects")) { - doJSONObjects(p, first, pdf); - } - if (all_keys || m->json_keys.count("objectinfo")) { - doJSONObjectinfo(p, first, pdf); + // We do objects last so their information is consistent with + // repairing the page tree. To see the original file with any page + // tree problems and the page tree not flattened, select + // objects/objectinfo without other keys. + if (this->m->json_version == 1) { + if (all_keys || m->json_keys.count("objects")) { + doJSONObjects(p, first, pdf); + } + if (all_keys || m->json_keys.count("objectinfo")) { + doJSONObjectinfo(p, first, pdf); + } + } else { + if (all_keys || m->json_keys.count("qpdf")) { + doJSONQpdf(p, first, pdf); + } } JSON::writeDictionaryClose(p, first, 0); -- cgit v1.2.3-70-g09d2