aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-05-07 19:33:45 +0200
committerJay Berkenbilt <ejb@ql.org>2022-05-08 19:45:20 +0200
commitc76536dd9a150adb71fdcda11ee1a93f25128cc7 (patch)
tree03f68965ad1646f643d184b0435bd6706b42fcdc /libqpdf
parentbdfc4da5105c86f0dc63ed390da240306e6b4466 (diff)
downloadqpdf-c76536dd9a150adb71fdcda11ee1a93f25128cc7.tar.zst
Implement JSON v2 output
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/QPDFJob.cc156
-rw-r--r--libqpdf/QPDFJob_config.cc23
-rw-r--r--libqpdf/QPDFObjectHandle.cc2
-rw-r--r--libqpdf/QPDF_Stream.cc16
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh2
-rw-r--r--libqpdf/qpdf/auto_job_help.hh21
-rw-r--r--libqpdf/qpdf/auto_job_init.hh5
-rw-r--r--libqpdf/qpdf/auto_job_json_init.hh9
-rw-r--r--libqpdf/qpdf/auto_job_schema.hh2
9 files changed, 209 insertions, 27 deletions
diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc
index 521377f8..621e6933 100644
--- a/libqpdf/QPDFJob.cc
+++ b/libqpdf/QPDFJob.cc
@@ -401,6 +401,7 @@ QPDFJob::Members::Members() :
flatten_rotation(false),
list_attachments(false),
json_version(0),
+ json_stream_data(qpdf_sj_none),
test_json_schema(false),
check(false),
optimize_images(false),
@@ -695,6 +696,17 @@ QPDFJob::checkConfiguration()
" use --replace-input to intentionally"
" overwrite the input file");
}
+
+ if (m->json_version == 1) {
+ if (m->json_keys.count("qpdf")) {
+ usage("json key \"qpdf\" is not valid for json version 1");
+ }
+ } else {
+ if (m->json_keys.count("objects") || m->json_keys.count("objectinfo")) {
+ usage("json keys \"objects\" and \"objectinfo\" are only valid for "
+ "json version 1");
+ }
+ }
}
unsigned long
@@ -1104,6 +1116,102 @@ QPDFJob::doJSONObjectinfo(Pipeline* p, bool& first, QPDF& pdf)
}
void
+QPDFJob::doJSONStream(
+ Pipeline* p,
+ bool& first,
+ QPDF& pdf,
+ QPDFObjectHandle& obj,
+ std::string const& file_prefix)
+{
+ Pipeline* stream_p = nullptr;
+ FILE* f = nullptr;
+ std::shared_ptr<Pl_StdioFile> f_pl;
+ std::string filename;
+ if (this->m->json_stream_data == qpdf_sj_file) {
+ filename = file_prefix + "-" + QUtil::int_to_string(obj.getObjectID());
+ f = QUtil::safe_fopen(filename.c_str(), "wb");
+ f_pl = std::make_shared<Pl_StdioFile>("stream data", f);
+ stream_p = f_pl.get();
+ }
+ auto j = JSON::makeDictionary();
+ j.addDictionaryMember(
+ "stream",
+ obj.getStreamJSON(
+ this->m->json_version,
+ this->m->json_stream_data,
+ this->m->decode_level,
+ stream_p,
+ filename));
+
+ JSON::writeDictionaryItem(p, first, "obj:" + obj.unparse(), j, 2);
+ if (f) {
+ f_pl->finish();
+ f_pl = nullptr;
+ fclose(f);
+ }
+}
+
+void
+QPDFJob::doJSONObject(
+ Pipeline* p,
+ bool& first,
+ QPDF& pdf,
+ std::string const& key,
+ QPDFObjectHandle& obj)
+{
+ auto j = JSON::makeDictionary();
+ j.addDictionaryMember("value", obj.getJSON(this->m->json_version, true));
+ JSON::writeDictionaryItem(p, first, key, j, 2);
+}
+
+void
+QPDFJob::doJSONQpdf(Pipeline* p, bool& first, QPDF& pdf)
+{
+ std::string file_prefix = this->m->json_stream_prefix;
+ if (this->m->json_stream_data == qpdf_sj_file) {
+ if (file_prefix.empty()) {
+ if (this->m->infilename.get()) {
+ file_prefix = this->m->infilename.get();
+ }
+ if (file_prefix.empty()) {
+ usage(
+ "please specify --json-stream-prefix since the input file "
+ "name is unknown");
+ }
+ }
+ }
+
+ JSON::writeDictionaryKey(p, first, "qpdf", 0);
+ bool first_qpdf = true;
+ JSON::writeDictionaryOpen(p, first_qpdf, 1);
+ JSON::writeDictionaryItem(
+ p, first_qpdf, "jsonversion", JSON::makeInt(this->m->json_version), 1);
+ JSON::writeDictionaryItem(
+ p, first_qpdf, "pdfversion", JSON::makeString(pdf.getPDFVersion()), 1);
+ JSON::writeDictionaryKey(p, first_qpdf, "objects", 1);
+ bool first_object = true;
+ JSON::writeDictionaryOpen(p, first_object, 2);
+ bool all_objects = m->json_objects.empty();
+ std::set<QPDFObjGen> wanted_og = getWantedJSONObjects();
+ std::vector<QPDFObjectHandle> objects = pdf.getAllObjects();
+ for (auto& obj: objects) {
+ if (all_objects || wanted_og.count(obj.getObjGen())) {
+ if (obj.isStream()) {
+ doJSONStream(p, first_object, pdf, obj, file_prefix);
+ } else {
+ doJSONObject(p, first_object, pdf, "obj:" + obj.unparse(), obj);
+ }
+ }
+ }
+ if (all_objects || m->json_objects.count("trailer")) {
+ auto trailer = pdf.getTrailer();
+ doJSONObject(p, first_object, pdf, "trailer", trailer);
+ }
+ JSON::writeDictionaryClose(p, first_object, 2);
+ JSON::writeDictionaryClose(p, first_qpdf, 1);
+}
+
+void
QPDFJob::doJSONPages(Pipeline* p, bool& first, QPDF& pdf)
{
JSON::writeDictionaryKey(p, first, "pages", 0);
@@ -1482,14 +1590,15 @@ QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
// The list of selectable top-level keys id duplicated in the
// following places: job.yml, QPDFJob::json_schema, and
// QPDFJob::doJSON.
- if (all_keys || keys->count("objects")) {
- schema.addDictionaryMember("objects", JSON::parse(R"({
+ if (json_version == 1) {
+ if (all_keys || keys->count("objects")) {
+ schema.addDictionaryMember("objects", JSON::parse(R"({
"<n n R|trailer>": "json representation of object"
})"));
- }
- if (all_keys || keys->count("objectinfo")) {
- JSON objectinfo =
- schema.addDictionaryMember("objectinfo", JSON::parse(R"({
+ }
+ if (all_keys || keys->count("objectinfo")) {
+ JSON objectinfo =
+ schema.addDictionaryMember("objectinfo", JSON::parse(R"({
"<object-id>": {
"stream": {
"filter": "if stream, its filters, otherwise null",
@@ -1498,6 +1607,17 @@ QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
}
}
})"));
+ }
+ } else {
+ if (all_keys || keys->count("qpdf")) {
+ schema.addDictionaryMember("qpdf", JSON::parse(R"({
+ "jsonversion": "qpdf json output version",
+ "pdfversion": "PDF version from PDF header",
+ "objects": {
+ "<obj:n n R|trailer>": "json representation of object"
+ }
+})"));
+ }
}
if (all_keys || keys->count("pages")) {
JSON page = schema.addDictionaryMember("pages", JSON::parse(R"([
@@ -1705,15 +1825,21 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
doJSONOutlines(p, first, pdf);
}
- // We do objects and objectinfo last so their information is
- // consistent with repairing the page tree. To see the original
- // file with any page tree problems and the page tree not
- // flattened, select objects/objectinfo without other keys.
- if (all_keys || m->json_keys.count("objects")) {
- doJSONObjects(p, first, pdf);
- }
- if (all_keys || m->json_keys.count("objectinfo")) {
- doJSONObjectinfo(p, first, pdf);
+ // We do objects last so their information is consistent with
+ // repairing the page tree. To see the original file with any page
+ // tree problems and the page tree not flattened, select
+ // objects/objectinfo without other keys.
+ if (this->m->json_version == 1) {
+ if (all_keys || m->json_keys.count("objects")) {
+ doJSONObjects(p, first, pdf);
+ }
+ if (all_keys || m->json_keys.count("objectinfo")) {
+ doJSONObjectinfo(p, first, pdf);
+ }
+ } else {
+ if (all_keys || m->json_keys.count("qpdf")) {
+ doJSONQpdf(p, first, pdf);
+ }
}
JSON::writeDictionaryClose(p, first, 0);
diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc
index 3f8f0840..d990de37 100644
--- a/libqpdf/QPDFJob_config.cc
+++ b/libqpdf/QPDFJob_config.cc
@@ -261,6 +261,29 @@ QPDFJob::Config::jsonObject(std::string const& parameter)
}
QPDFJob::Config*
+QPDFJob::Config::jsonStreamData(std::string const& parameter)
+{
+ if (parameter == "none") {
+ o.m->json_stream_data = qpdf_sj_none;
+ } else if (parameter == "inline") {
+ o.m->json_stream_data = qpdf_sj_inline;
+ } else if (parameter == "file") {
+ o.m->json_stream_data = qpdf_sj_file;
+ } else {
+ usage("invalid json-streams option");
+ }
+
+ return this;
+}
+
+QPDFJob::Config*
+QPDFJob::Config::jsonStreamPrefix(std::string const& parameter)
+{
+ o.m->json_stream_prefix = parameter;
+ return this;
+}
+
+QPDFJob::Config*
QPDFJob::Config::testJsonSchema()
{
o.m->test_json_schema = true;
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 1d6a9ccf..10fb153c 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -1800,7 +1800,7 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
JSON
QPDFObjectHandle::getStreamJSON(
int json_version,
- qpdf_stream_data_json_e json_data,
+ qpdf_json_stream_data_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index 67a3ad0d..ff62df73 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -189,7 +189,7 @@ QPDF_Stream::getJSON(int json_version)
JSON
QPDF_Stream::getStreamJSON(
int json_version,
- qpdf_stream_data_json_e json_data,
+ qpdf_json_stream_data_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
@@ -231,11 +231,17 @@ QPDF_Stream::getStreamJSON(
} else {
data_pipeline = &discard;
}
- filtered = pipeStreamData(
- data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
- if (filter && (!filtered)) {
+ bool succeeded = pipeStreamData(
+ data_pipeline,
+ &filtered,
+ 0,
+ decode_level,
+ false,
+ (attempt == 1));
+ if ((!succeeded) || (filter && (!filtered))) {
// Try again
filter = false;
+ decode_level = qpdf_dl_none;
} else {
if (buf_pl.get()) {
buf = buf_pl->getBufferSharedPointer();
@@ -247,7 +253,7 @@ QPDF_Stream::getStreamJSON(
// touching top-level keys.
dict = this->stream_dict.unsafeShallowCopy();
dict.removeKey("/Length");
- if (filtered) {
+ if (filter && filtered) {
dict.removeKey("/Filter");
dict.removeKey("/DecodeParms");
}
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index fcf98ffa..51b215e2 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -63,7 +63,7 @@ class QPDF_Stream: public QPDFObject
addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
JSON getStreamJSON(
int json_version,
- qpdf_stream_data_json_e json_data,
+ qpdf_json_stream_data_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename);
diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh
index 55d2cc63..47210371 100644
--- a/libqpdf/qpdf/auto_job_help.hh
+++ b/libqpdf/qpdf/auto_job_help.hh
@@ -817,6 +817,21 @@ objects will be shown.
ap.addOptionHelp("--job-json-help", "json", "show format of job JSON", R"(Describe the format of the QPDFJob JSON input used by
--job-json-file.
)");
+ap.addOptionHelp("--json-stream-data", "json", "how to handle streams in json output", R"(--json-stream-data={none|inline|file}
+
+Control whether streams in json output should be omitted,
+written inline (base64-encoded) or written to a file. If "file"
+is chosen, the file will be the name of the input file appended
+with -nnn where nnn is the object number. The prefix can be
+overridden with --json-stream-prefix.
+)");
+ap.addOptionHelp("--json-stream-prefix", "json", "prefix for json stream data files", R"(--json-stream-prefix=file-prefix
+
+When --json-stream-data=file is given, override the input file
+name as the prefix for stream data files. Whatever is given here
+will be appended with -nnn to create the name of the file that
+will contain the data for the stream stream in object nnn.
+)");
ap.addHelpTopic("testing", "options for testing or debugging", R"(The options below are useful when writing automated test code that
includes files created by qpdf or when testing qpdf itself.
)");
@@ -829,6 +844,9 @@ for testing only so that output files can be reproducible. Never
use it for production files. This option is not secure since it
significantly weakens the encryption.
)");
+}
+static void add_help_8(QPDFArgParser& ap)
+{
ap.addOptionHelp("--linearize-pass1", "testing", "save pass 1 of linearization", R"(--linearize-pass1=file
Write the first pass of linearization to the named file. The
@@ -839,9 +857,6 @@ ap.addOptionHelp("--test-json-schema", "testing", "test generated json against s
the output of qpdf --json and the output of qpdf --json-help.
)");
}
-static void add_help_8(QPDFArgParser& ap)
-{
-}
static void add_help(QPDFArgParser& ap)
{
add_help_1(ap);
diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh
index b3191d4d..5c13275c 100644
--- a/libqpdf/qpdf/auto_job_init.hh
+++ b/libqpdf/qpdf/auto_job_init.hh
@@ -20,7 +20,8 @@ static char const* object_streams_choices[] = {"disable", "preserve", "generate"
static char const* remove_unref_choices[] = {"auto", "yes", "no", 0};
static char const* flatten_choices[] = {"all", "print", "screen", 0};
static char const* json_version_choices[] = {"1", "2", "latest", 0};
-static char const* json_key_choices[] = {"acroform", "attachments", "encrypt", "objectinfo", "objects", "outlines", "pagelabels", "pages", 0};
+static char const* json_key_choices[] = {"acroform", "attachments", "encrypt", "objectinfo", "objects", "outlines", "pagelabels", "pages", "qpdf", 0};
+static char const* json_stream_data_choices[] = {"none", "inline", "file", 0};
static char const* print128_choices[] = {"full", "low", "none", 0};
static char const* modify128_choices[] = {"all", "annotate", "form", "assembly", "none", 0};
@@ -101,6 +102,7 @@ this->ap.addRequiredParameter("remove-attachment", [this](std::string const& x){
this->ap.addRequiredParameter("rotate", [this](std::string const& x){c_main->rotate(x);}, "[+|-]angle");
this->ap.addRequiredParameter("show-attachment", [this](std::string const& x){c_main->showAttachment(x);}, "attachment");
this->ap.addRequiredParameter("show-object", [this](std::string const& x){c_main->showObject(x);}, "trailer");
+this->ap.addRequiredParameter("json-stream-prefix", [this](std::string const& x){c_main->jsonStreamPrefix(x);}, "stream-file-prefix");
this->ap.addOptionalParameter("collate", [this](std::string const& x){c_main->collate(x);});
this->ap.addOptionalParameter("split-pages", [this](std::string const& x){c_main->splitPages(x);});
this->ap.addChoices("compress-streams", [this](std::string const& x){c_main->compressStreams(x);}, true, yn_choices);
@@ -113,6 +115,7 @@ this->ap.addChoices("object-streams", [this](std::string const& x){c_main->objec
this->ap.addChoices("password-mode", [this](std::string const& x){c_main->passwordMode(x);}, true, password_mode_choices);
this->ap.addChoices("remove-unreferenced-resources", [this](std::string const& x){c_main->removeUnreferencedResources(x);}, true, remove_unref_choices);
this->ap.addChoices("stream-data", [this](std::string const& x){c_main->streamData(x);}, true, stream_data_choices);
+this->ap.addChoices("json-stream-data", [this](std::string const& x){c_main->jsonStreamData(x);}, true, json_stream_data_choices);
this->ap.addChoices("json", [this](std::string const& x){c_main->json(x);}, false, json_version_choices);
this->ap.registerOptionTable("pages", b(&ArgParser::argEndPages));
this->ap.addPositional(p(&ArgParser::argPagesPositional));
diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh
index 92c4d65c..c73eb3a7 100644
--- a/libqpdf/qpdf/auto_job_json_init.hh
+++ b/libqpdf/qpdf/auto_job_json_init.hh
@@ -13,7 +13,8 @@ static char const* object_streams_choices[] = {"disable", "preserve", "generate"
static char const* remove_unref_choices[] = {"auto", "yes", "no", 0};
static char const* flatten_choices[] = {"all", "print", "screen", 0};
static char const* json_version_choices[] = {"1", "2", "latest", 0};
-static char const* json_key_choices[] = {"acroform", "attachments", "encrypt", "objectinfo", "objects", "outlines", "pagelabels", "pages", 0};
+static char const* json_key_choices[] = {"acroform", "attachments", "encrypt", "objectinfo", "objects", "outlines", "pagelabels", "pages", "qpdf", 0};
+static char const* json_stream_data_choices[] = {"none", "inline", "file", 0};
static char const* print128_choices[] = {"full", "low", "none", 0};
static char const* modify128_choices[] = {"all", "annotate", "form", "assembly", "none", 0};
@@ -252,6 +253,12 @@ beginArray(bindJSON(&Handlers::beginJsonObjectArray), bindBare(&Handlers::endJso
addParameter([this](std::string const& p) { c_main->jsonObject(p); });
popHandler(); // array: .jsonObject[]
popHandler(); // key: jsonObject
+pushKey("jsonStreamData");
+addChoices(json_stream_data_choices, true, [this](std::string const& p) { c_main->jsonStreamData(p); });
+popHandler(); // key: jsonStreamData
+pushKey("jsonStreamPrefix");
+addParameter([this](std::string const& p) { c_main->jsonStreamPrefix(p); });
+popHandler(); // key: jsonStreamPrefix
pushKey("allowWeakCrypto");
addBare([this]() { c_main->allowWeakCrypto(); });
popHandler(); // key: allowWeakCrypto
diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh
index 267dad23..c2310961 100644
--- a/libqpdf/qpdf/auto_job_schema.hh
+++ b/libqpdf/qpdf/auto_job_schema.hh
@@ -84,6 +84,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
"jsonObject": [
"limit which objects are in JSON"
],
+ "jsonStreamData": "how to handle streams in json output",
+ "jsonStreamPrefix": "prefix for json stream data files",
"allowWeakCrypto": "allow insecure cryptographic algorithms",
"keepFilesOpen": "manage keeping multiple files open",
"keepFilesOpenThreshold": "set threshold for keepFilesOpen",