aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-01-22 17:25:55 +0100
committerJay Berkenbilt <ejb@ql.org>2022-01-30 19:11:03 +0100
commit1c8d53465ff4d8e732498b39e49595b16d6754af (patch)
tree4a26bbf245ead7f4b051bb8cc3311bc333154b70
parentb9cd693a5b36b8b0246822cb97386792045179ec (diff)
downloadqpdf-1c8d53465ff4d8e732498b39e49595b16d6754af.tar.zst
Incorporate job schema generation into generate_auto_job
-rw-r--r--README-maintainer5
-rwxr-xr-xgenerate_auto_job122
-rw-r--r--job.sums5
-rw-r--r--job.yml158
-rw-r--r--libqpdf/qpdf/auto_job_schema.hh176
5 files changed, 459 insertions, 7 deletions
diff --git a/README-maintainer b/README-maintainer
index d5801070..7ea049dc 100644
--- a/README-maintainer
+++ b/README-maintainer
@@ -128,7 +128,10 @@ Command-line arguments are closely coupled with QPDFJob. To add a new
command-line argument, add the option to the appropriate table in
job.yml. This will automatically declare a method in the private
ArgParser class in QPDFJob_argv.cc which you have to implement. The
-implementation should make calls to methods in QPDFJob.
+implementation should make calls to methods in QPDFJob. Then, add the
+same option to either the no-json section of job.yml if it is to be
+excluded from the job json structure, or add it under the json
+structure to the place where it should appear in the json structure.
The build will fail until the new option is documented in
manual/cli.rst. To do that, create documentation for the option by
diff --git a/generate_auto_job b/generate_auto_job
index 79abc8b9..1706ff04 100755
--- a/generate_auto_job
+++ b/generate_auto_job
@@ -5,6 +5,7 @@ import argparse
import hashlib
import re
import yaml
+import json
whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
@@ -29,6 +30,7 @@ class Main:
'decl': 'libqpdf/qpdf/auto_job_decl.hh',
'init': 'libqpdf/qpdf/auto_job_init.hh',
'help': 'libqpdf/qpdf/auto_job_help.hh',
+ 'schema': 'libqpdf/qpdf/auto_job_schema.hh',
}
SUMS = 'job.sums'
@@ -199,6 +201,9 @@ class Main:
raise Exception(
f'help for unknown option {option},'
f' lineno={lineno}')
+ if option not in self.help_options:
+ # QXXXQ also need to exclude help table
+ self.jdata[option[2:]]['help'] = short_text
print(f'ap.addOptionHelp("{option}", "{topic}",'
f' "{short_text}", R"({long_text})");', file=f)
help_lines += 1
@@ -226,9 +231,12 @@ class Main:
with open('job.yml', 'r') as f:
data = yaml.safe_load(f.read())
self.validate(data)
- self.options_without_help = set(
+ # Add the built-in help options to tables that we populate as
+ # we read job.yml since we won't encounter these in job.yml
+ self.help_options = set(
['--completion-bash', '--completion-zsh', '--help']
)
+ self.options_without_help = set(self.help_options)
self.prepare(data)
with open(self.DESTS['decl'], 'w') as f:
print(BANNER, file=f)
@@ -242,6 +250,11 @@ class Main:
with open('manual/cli.rst', 'r') as df:
print(BANNER, file=f)
self.generate_doc(df, f)
+ self.generate_schema(data)
+ with open(self.DESTS['schema'], 'w') as f:
+ print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
+ json.dumps(self.schema, indent=2, separators=(',', ': ')) +
+ ')";', file=f)
# Update hashes last to ensure that this will be rerun in the
# event of a failure.
@@ -251,6 +264,24 @@ class Main:
def prepare(self, data):
self.decls = []
self.init = []
+ self.jdata = {
+ # option: {
+ # tables: set(),
+ # help: string,
+ # QXXXQ something for registering handler
+ # }
+ }
+
+ def add_jdata(flag, table):
+ nonlocal self
+ if table == 'help':
+ self.help_options.add(f'--{flag}')
+ elif flag in self.jdata:
+ self.jdata[flag]['tables'].add(table)
+ else:
+ self.jdata[flag] = {
+ 'tables': set([table]),
+ }
self.init.append('auto b = [this](void (ArgParser::*f)()) {')
self.init.append(' return QPDFArgParser::bindBare(f, this);')
@@ -275,7 +306,7 @@ class Main:
self.decls.append('')
for o in data['options']:
table = o['table']
-
+ table_prefix = o.get('prefix', table)
if table == 'main':
self.init.append('this->ap.selectMainOptionTable();')
elif table == 'help':
@@ -296,12 +327,14 @@ class Main:
self.decls.append(f'void {identifier}();')
self.init.append(f'this->ap.addBare("{i}", '
f'b(&ArgParser::{identifier}));')
+ add_jdata(i, table_prefix)
for i in o.get('optional_parameter', []):
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'p(&ArgParser::{identifier}));')
+ add_jdata(i, table_prefix)
for i, v in o.get('required_parameter', {}).items():
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
@@ -309,6 +342,7 @@ class Main:
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'p(&ArgParser::{identifier})'
f', "{v}");')
+ add_jdata(i, table_prefix)
for i, v in o.get('required_choices', {}).items():
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
@@ -316,6 +350,7 @@ class Main:
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', true, {v}_choices);')
+ add_jdata(i, table_prefix)
for i, v in o.get('optional_choices', {}).items():
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
@@ -323,11 +358,13 @@ class Main:
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', false, {v}_choices);')
+ add_jdata(i, table_prefix)
if table not in ('main', 'help'):
identifier = self.to_identifier(table, 'argEnd', False)
self.decls.append(f'void {identifier}();')
for o in data['options']:
table = o['table']
+ table_prefix = o.get('prefix', table)
if 'from_table' not in o:
continue
if table == 'main':
@@ -341,6 +378,79 @@ class Main:
for j in ft['options']:
self.init.append('this->ap.copyFromOtherTable'
f'("{j}", "{other_table}");')
+ add_jdata(j, table_prefix)
+
+ def generate_schema(self, data):
+ # XXX check data['json'] against what we know from jdata.
+ # Ultimately be able to generate a schema as well as
+ # JSONHandler and registering stuff.
+
+ # Check to make sure that every command-line option is
+ # represented either in data['json'] or data['no-json'].
+
+ # Build a list of options that we expect. If an option appears
+ # once, we just expect to see it once. If it appears in more
+ # than one options table, we need to see a separate version of
+ # it for each option table. It is represented prepended in
+ # job.yml with the table prefix. The table prefix is removed
+ # in the schema.
+ expected = {}
+ for k, v in self.jdata.items():
+ tables = v['tables']
+ if len(tables) == 1:
+ expected[k] = {**v}
+ else:
+ for t in sorted(tables):
+ expected[f'{t}.{k}'] = {**v}
+ for _, v in expected.items():
+ del v['tables']
+ options_seen = set(data['no-json'])
+
+ self.schema = {}
+
+ def option_to_json_key(s):
+ return self.to_identifier(s, '', False)
+
+ # Walk through the json information building the schema as we
+ # go. This verifies consistency between command-line options
+ # and the json section of the data and builds up a schema by
+ # populating with help information as available.
+ def build_schema(j, s):
+ for k, v in j.items():
+ if not (k in expected or
+ k.startswith('_') or
+ isinstance(v, str)):
+ raise Exception(f'json: unknown key {k}')
+ if k.startswith('_'):
+ schema_key = k[1:]
+ else:
+ schema_key = re.sub(r'[^\.]+\.', '', k)
+ schema_key = option_to_json_key(schema_key)
+ schema_value = v
+ if k in expected:
+ options_seen.add(re.sub('^_', '', k))
+ if v is None:
+ schema_value = re.sub(
+ r'--(\S+)',
+ lambda x: option_to_json_key(x.group(1)),
+ expected[k]['help'])
+ if (isinstance(v, dict)):
+ schema_value = {}
+ build_schema(v, schema_value)
+ elif (isinstance(v, list)):
+ if len(v) != 1:
+ raise Exception('json contains array with length != 1')
+ if isinstance(v[0], dict):
+ schema_value = [{}]
+ build_schema(v[0], schema_value[0])
+ elif schema_value is None:
+ raise Exception(f'unknown schema value for {k}')
+ s[schema_key] = schema_value
+
+ build_schema(data['json'], self.schema)
+ if options_seen != set(expected.keys()):
+ raise Exception('missing from json: ' +
+ str(set(expected.keys()) - options_seen))
def check_keys(self, what, d, exp):
if not isinstance(d, dict):
@@ -351,7 +461,8 @@ class Main:
exit(f'{what}: unknown keys = {extra}')
def validate(self, data):
- self.check_keys('top', data, set(['choices', 'options']))
+ self.check_keys('top', data, set(
+ ['choices', 'options', 'no-json', 'json']))
for o in data['options']:
self.check_keys('top', o, set(
['table', 'prefix', 'bare', 'positional',
@@ -363,7 +474,10 @@ class Main:
if const:
identifier = f'{prefix}_{identifier.upper()}'
else:
- identifier = f'{prefix}_{identifier.lower()}'
+ if prefix:
+ identifier = f'{prefix}_{identifier.lower()}'
+ else:
+ identifier = identifier.lower()
identifier = re.sub(r'_([a-z])',
lambda x: x.group(1).upper(),
identifier).replace('_', '')
diff --git a/job.sums b/job.sums
index 2387d64b..07c08c43 100644
--- a/job.sums
+++ b/job.sums
@@ -1,8 +1,9 @@
# Generated by generate_auto_job
-generate_auto_job b70f64314f1ae1f100fa6a11975dee5f7669038e2a619b6c9da1e5230db1dd1b
-job.yml 8177cadf41096efdc174f04daadfe5d98c592ad44ad10cb96537521fd79a801a
+generate_auto_job 0758b244fc4e2d3e440883072d2740bc4cdb26c5aa8de938f028afd7d83fad79
+job.yml 2856c2635d42f0a58717d3ffce3125816d8f98ff17245c4b7a0669d70cd68b84
libqpdf/qpdf/auto_job_decl.hh 97395ecbe590b23ae04d6cce2080dbd0e998917ff5eeaa5c6aafa91041d3cd6a
libqpdf/qpdf/auto_job_help.hh 2653faaf59415bec81c3a85d426239d52b609ac24faba34ec2d26f00710dd2c6
libqpdf/qpdf/auto_job_init.hh 465bf46769559ceb77110d1b9d3293ba9b3595850b49848c31aeabd10aadb4ad
+libqpdf/qpdf/auto_job_schema.hh c91a4e182e088797b70dda94af03ca32d360f3564890132da2a8bdc3c4432423
manual/_ext/qpdf.py 855fe12de5af7a10bb24be6ecc4d5dff4c84ac58cf388a13be6bbb394346a67d
manual/cli.rst b136c7f33a538c580b081a7e802c27635aad2a4229efa0eb0736466116b7aa90
diff --git a/job.yml b/job.yml
index fa15e290..dec43e90 100644
--- a/job.yml
+++ b/job.yml
@@ -217,3 +217,161 @@ options:
required_parameter:
prefix: prefix
password: password
+no-json:
+ - preserve-unreferenced-resources
+json:
+ # The structure of this section defines what the json input to
+ # QPDFJob looks like. If a key starts with underscore or has a value
+ # that is a string, it does not map to a command-line argument. If
+ # value is null, its properties and help come from other information
+ # known by generate_auto_job. This information is used to construct
+ # a "schema" (as in JSON.hh) for the json input to QPDFJob. The
+ # leading underscore is removed.
+ _input:
+ _file:
+ _name: "input filename"
+ main.password:
+ password-file:
+ empty:
+ _output:
+ _file:
+ _name: "output filename"
+ replace-input:
+ split-pages:
+ _options:
+ qdf:
+ preserve-unreferenced:
+ newline-before-endstream:
+ normalize-content:
+ stream-data:
+ compress-streams:
+ recompress-flate:
+ decode-level:
+ decrypt:
+ static-aes-iv:
+ static-id:
+ no-original-object-ids:
+ copy-encryption:
+ encryption-file-password:
+ linearize:
+ linearize-pass1:
+ object-streams:
+ min-version:
+ force-version:
+ progress:
+ encrypt:
+ user-password: "user password"
+ owner-password: "owner password"
+ key-length: "key length: 48, 128, 256"
+ _40-bit:
+ Enc40.annotate:
+ Enc40.extract:
+ Enc40.modify:
+ Enc40.print:
+ _128-bit:
+ Enc128.accessibility:
+ Enc128.annotate:
+ Enc128.assemble:
+ Enc128.cleartext-metadata:
+ Enc128.extract:
+ Enc128.form:
+ Enc128.modify-other:
+ Enc128.modify:
+ Enc128.print:
+ force-V4:
+ use-aes:
+ _256-bit:
+ Enc256.accessibility:
+ Enc256.annotate:
+ Enc256.assemble:
+ Enc256.cleartext-metadata:
+ Enc256.extract:
+ Enc256.form:
+ Enc256.modify-other:
+ Enc256.modify:
+ Enc256.print:
+ allow-insecure:
+ force-R5:
+ _options:
+ allow-weak-crypto:
+ deterministic-id:
+ keep-files-open:
+ keep-files-open-threshold:
+ no-warn:
+ verbose:
+ warning-exit-0:
+ ignore-xref-streams:
+ password-is-hex-key:
+ password-mode:
+ suppress-password-recovery:
+ suppress-recovery:
+ _inspect:
+ check:
+ check-linearization:
+ filtered-stream-data:
+ is-encrypted:
+ raw-stream-data:
+ requires-password:
+ show-encryption:
+ show-encryption-key:
+ show-linearization:
+ show-npages:
+ show-object:
+ show-pages:
+ show-xref:
+ with-images:
+ list-attachments:
+ show-attachment:
+ json:
+ json-key:
+ - null
+ json-object:
+ - null
+ _transform:
+ coalesce-contents:
+ compression-level:
+ externalize-inline-images:
+ ii-min-bytes:
+ remove-unreferenced-resources:
+ _modify:
+ add-attachment:
+ - file: "attachment to add"
+ creationdate:
+ description:
+ filename:
+ key:
+ mimetype:
+ moddate:
+ replace:
+ remove-attachment:
+ copy-attachments-from:
+ - file: "attachment source filename"
+ CopyAtt.password:
+ prefix:
+ collate:
+ flatten-annotations:
+ flatten-rotation:
+ generate-appearances:
+ keep-inline-images:
+ oi-min-area:
+ oi-min-height:
+ oi-min-width:
+ optimize-images:
+ pages:
+ - file: "source for for pages"
+ Pages.password:
+ range: "page range"
+ remove-page-labels:
+ rotate:
+ overlay:
+ file: "source file for overlay"
+ UO.password:
+ from:
+ repeat:
+ to:
+ underlay:
+ file: "source file for underlay"
+ UO.password:
+ from:
+ repeat:
+ to:
diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh
new file mode 100644
index 00000000..9ecdc507
--- /dev/null
+++ b/libqpdf/qpdf/auto_job_schema.hh
@@ -0,0 +1,176 @@
+static constexpr char const* JOB_SCHEMA_DATA = R"({
+ "input": {
+ "file": {
+ "name": "input filename",
+ "password": "specify password",
+ "passwordFile": "read password from a file"
+ },
+ "empty": "empty input file"
+ },
+ "output": {
+ "file": {
+ "name": "output filename"
+ },
+ "replaceInput": "replace input with output",
+ "splitPages": "write pages to separate files",
+ "options": {
+ "qdf": "enable viewing PDF code in a text editor",
+ "preserveUnreferenced": "preserve unreferenced objects",
+ "newlineBeforeEndstream": "force a newline before endstream",
+ "normalizeContent": "fix newlines in content streams",
+ "streamData": "control stream compression",
+ "compressStreams": "compress uncompressed streams",
+ "recompressFlate": "uncompress and recompress flate",
+ "decodeLevel": "control which streams to uncompress",
+ "decrypt": "remove encryption from input file",
+ "staticAesIv": "use a fixed AES vector",
+ "staticId": "use a fixed document ID",
+ "noOriginalObjectIds": "omit original object ID in qdf",
+ "copyEncryption": "copy another file's encryption details",
+ "encryptionFilePassword": "supply password for copyEncryption",
+ "linearize": "linearize (web-optimize) output",
+ "linearizePass1": "save pass 1 of linearization",
+ "objectStreams": "control use of object streams",
+ "minVersion": "set minimum PDF version",
+ "forceVersion": "set output PDF version",
+ "progress": "show progress when writing",
+ "encrypt": {
+ "userPassword": "user password",
+ "ownerPassword": "owner password",
+ "keyLength": "key length: 48, 128, 256",
+ "40Bit": {
+ "annotate": "restrict document annotation",
+ "extract": "restrict text/graphic extraction",
+ "modify": "restrict document modification",
+ "print": "restrict printing"
+ },
+ "128Bit": {
+ "accessibility": "restrict document accessibility",
+ "annotate": "restrict document annotation",
+ "assemble": "restrict document assembly",
+ "cleartextMetadata": "don't encrypt metadata",
+ "extract": "restrict text/graphic extraction",
+ "form": "restrict form filling",
+ "modifyOther": "restrict other modifications",
+ "modify": "restrict document modification",
+ "print": "restrict printing",
+ "forceV4": "force V=4 in encryption dictionary",
+ "useAes": "use AES with 128-bit encryption"
+ },
+ "256Bit": {
+ "accessibility": "restrict document accessibility",
+ "annotate": "restrict document annotation",
+ "assemble": "restrict document assembly",
+ "cleartextMetadata": "don't encrypt metadata",
+ "extract": "restrict text/graphic extraction",
+ "form": "restrict form filling",
+ "modifyOther": "restrict other modifications",
+ "modify": "restrict document modification",
+ "print": "restrict printing",
+ "allowInsecure": "allow empty owner passwords",
+ "forceR5": "use unsupported R=5 encryption"
+ }
+ }
+ }
+ },
+ "options": {
+ "allowWeakCrypto": "allow insecure cryptographic algorithms",
+ "deterministicId": "generate ID deterministically",
+ "keepFilesOpen": "manage keeping multiple files open",
+ "keepFilesOpenThreshold": "set threshold for keepFilesOpen",
+ "noWarn": "suppress printing warning messages",
+ "verbose": "print additional information",
+ "warningExit0": "exit 0 even with warnings",
+ "ignoreXrefStreams": "use xref tables rather than streams",
+ "passwordIsHexKey": "provide hex-encoded encryption key",
+ "passwordMode": "tweak how qpdf encodes passwords",
+ "suppressPasswordRecovery": "don't try different password encodings",
+ "suppressRecovery": "suppress error recovery"
+ },
+ "inspect": {
+ "check": "partially check whether PDF is valid",
+ "checkLinearization": "check linearization tables",
+ "filteredStreamData": "show filtered stream data",
+ "isEncrypted": "silently test whether a file is encrypted",
+ "rawStreamData": "show raw stream data",
+ "requiresPassword": "silently test a file's password",
+ "showEncryption": "information about encrypted files",
+ "showEncryptionKey": "show key with showEncryption",
+ "showLinearization": "show linearization hint tables",
+ "showNpages": "show number of pages",
+ "showObject": "show contents of an object",
+ "showPages": "display page dictionary information",
+ "showXref": "show cross reference data",
+ "withImages": "include image details with showPages",
+ "listAttachments": "list embedded files",
+ "showAttachment": "export an embedded file",
+ "json": "show file in json format",
+ "jsonKey": [
+ null
+ ],
+ "jsonObject": [
+ null
+ ]
+ },
+ "transform": {
+ "coalesceContents": "combine content streams",
+ "compressionLevel": "set compression level for flate",
+ "externalizeInlineImages": "convert inline to regular images",
+ "iiMinBytes": "set minimum size for externalizeInlineImages",
+ "removeUnreferencedResources": "remove unreferenced page resources"
+ },
+ "modify": {
+ "addAttachment": [
+ {
+ "file": "attachment to add",
+ "creationdate": "set attachment's creation date",
+ "description": "set attachment's description",
+ "filename": "set attachment's displayed filename",
+ "key": "specify attachment key",
+ "mimetype": "attachment mime type, e.g. application/pdf",
+ "moddate": "set attachment's modification date",
+ "replace": "replace attachment with same key"
+ }
+ ],
+ "removeAttachment": "remove an embedded file",
+ "copyAttachmentsFrom": [
+ {
+ "file": "attachment source filename",
+ "password": "specify password",
+ "prefix": "key prefix for copying attachments"
+ }
+ ],
+ "collate": "collate with pages",
+ "flattenAnnotations": "push annotations into content",
+ "flattenRotation": "remove rotation from page dictionary",
+ "generateAppearances": "generate appearances for form fields",
+ "keepInlineImages": "exclude inline images from optimization",
+ "oiMinArea": "minimum area for optimizeImages",
+ "oiMinHeight": "minimum height for optimizeImages",
+ "oiMinWidth": "minimum width for optimizeImages",
+ "optimizeImages": "use efficient compression for images",
+ "pages": [
+ {
+ "file": "source for for pages",
+ "password": "specify password",
+ "range": "page range"
+ }
+ ],
+ "removePageLabels": "remove explicit page numbers",
+ "rotate": "rotate pages",
+ "overlay": {
+ "file": "source file for overlay",
+ "password": "specify password",
+ "from": "source pages for underlay/overlay",
+ "repeat": "overlay/underlay pages to repeat",
+ "to": "destination pages for underlay/overlay"
+ },
+ "underlay": {
+ "file": "source file for underlay",
+ "password": "specify password",
+ "from": "source pages for underlay/overlay",
+ "repeat": "overlay/underlay pages to repeat",
+ "to": "destination pages for underlay/overlay"
+ }
+ }
+})";