aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2023-05-24 17:28:17 +0200
committerm-holger <m-holger@kubitscheck.org>2023-05-27 11:54:19 +0200
commit698a70e6a84cf7c0db667e9d9e021b4c34c85a3e (patch)
tree8318f0b78f76bee65deb45fffcc49eb7ba89fb56
parent9907700faeca965c1e544af9247102d163a8b7df (diff)
downloadqpdf-698a70e6a84cf7c0db667e9d9e021b4c34c85a3e.tar.zst
Code tidy - reflow comments and strings
-rw-r--r--include/qpdf/Buffer.hh29
-rw-r--r--include/qpdf/JSON.hh235
-rw-r--r--include/qpdf/Pipeline.hh69
-rw-r--r--include/qpdf/Pl_Buffer.hh47
-rw-r--r--include/qpdf/QPDF.hh732
-rw-r--r--include/qpdf/QPDFAcroFormDocumentHelper.hh234
-rw-r--r--include/qpdf/QPDFExc.hh37
-rw-r--r--include/qpdf/QPDFFormFieldObjectHelper.hh154
-rw-r--r--include/qpdf/QPDFJob.hh225
-rw-r--r--include/qpdf/QPDFObjectHandle.hh1158
-rw-r--r--include/qpdf/QPDFPageObjectHelper.hh440
-rw-r--r--include/qpdf/QPDFTokenizer.hh106
-rw-r--r--libqpdf/JSON.cc68
-rw-r--r--libqpdf/Pl_Buffer.cc3
-rw-r--r--libqpdf/QPDF.cc490
-rw-r--r--libqpdf/QPDFAcroFormDocumentHelper.cc329
-rw-r--r--libqpdf/QPDFFormFieldObjectHelper.cc63
-rw-r--r--libqpdf/QPDFJob.cc441
-rw-r--r--libqpdf/QPDFObjectHandle.cc88
-rw-r--r--libqpdf/QPDFPageObjectHelper.cc145
-rw-r--r--libqpdf/QPDFParser.cc86
-rw-r--r--libqpdf/QPDFTokenizer.cc123
-rw-r--r--libqpdf/QPDF_Stream.cc56
-rw-r--r--libqpdf/QPDF_encryption.cc135
-rw-r--r--libqpdf/QPDF_json.cc23
-rw-r--r--libqpdf/QPDF_linearization.cc437
-rw-r--r--libqpdf/QPDF_optimization.cc94
-rw-r--r--libqpdf/QPDF_pages.cc122
28 files changed, 2533 insertions, 3636 deletions
diff --git a/include/qpdf/Buffer.hh b/include/qpdf/Buffer.hh
index ab92bd53..719a6bd5 100644
--- a/include/qpdf/Buffer.hh
+++ b/include/qpdf/Buffer.hh
@@ -2,22 +2,19 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef BUFFER_HH
#define BUFFER_HH
@@ -34,13 +31,13 @@ class Buffer
QPDF_DLL
Buffer();
- // Create a Buffer object whose memory is owned by the class and
- // will be freed when the Buffer object is destroyed.
+ // Create a Buffer object whose memory is owned by the class and will be freed when the Buffer
+ // object is destroyed.
QPDF_DLL
Buffer(size_t size);
- // Create a Buffer object whose memory is owned by the caller and
- // will not be freed when the Buffer is destroyed.
+ // Create a Buffer object whose memory is owned by the caller and will not be freed when the
+ // Buffer is destroyed.
QPDF_DLL
Buffer(unsigned char* buf, size_t size);
diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh
index 00f78112..ce37ef50 100644
--- a/include/qpdf/JSON.hh
+++ b/include/qpdf/JSON.hh
@@ -2,38 +2,31 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef JSON_HH
#define JSON_HH
-// This is a simple JSON serializer and parser, primarily designed for
-// serializing QPDF Objects as JSON. While it may work as a
-// general-purpose JSON parser/serializer, there are better options.
-// JSON objects contain their data as smart pointers. When one JSON object
-// is added to another, this pointer is copied. This means you can
-// create temporary JSON objects on the stack, add them to other
-// objects, and let them go out of scope safely. It also means that if
-// a JSON object is added in more than one place, all copies
-// share the underlying data. This makes them similar in structure and
-// behavior to QPDFObjectHandle and may feel natural within the QPDF
-// codebase, but it is also a good reason not to use this as a
-// general-purpose JSON package.
+// This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as
+// JSON. While it may work as a general-purpose JSON parser/serializer, there are better options.
+// JSON objects contain their data as smart pointers. When one JSON object is added to another, this
+// pointer is copied. This means you can create temporary JSON objects on the stack, add them to
+// other objects, and let them go out of scope safely. It also means that if a JSON object is added
+// in more than one place, all copies share the underlying data. This makes them similar in
+// structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it
+// is also a good reason not to use this as a general-purpose JSON package.
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
@@ -61,71 +54,60 @@ class JSON
QPDF_DLL
std::string unparse() const;
- // Write the JSON object through a pipeline. The `depth` parameter
- // specifies how deeply nested this is in another JSON structure,
- // which makes it possible to write clean-looking JSON
+ // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested
+ // this is in another JSON structure, which makes it possible to write clean-looking JSON
// incrementally.
QPDF_DLL
void write(Pipeline*, size_t depth = 0) const;
// Helper methods for writing JSON incrementally.
//
- // "first" -- Several methods take a `bool& first` parameter. The
- // open methods always set it to true, and the methods to output
- // items always set it to false. This way, the item and close
- // methods can always know whether or not a first item is being
- // written. The intended mode of operation is to start with a new
- // `bool first = true` each time a new container is opened and
- // to pass that `first` through to all the methods that are
- // called to add top-level items to the container as well as to
- // close the container. This lets the JSON object use it to keep
- // track of when it's writing a first object and when it's not. If
- // incrementally writing multiple levels of depth, a new `first`
- // should used for each new container that is opened.
+ // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to
+ // true, and the methods to output items always set it to false. This way, the item and close
+ // methods can always know whether or not a first item is being written. The intended mode of
+ // operation is to start with a new `bool first = true` each time a new container is opened and
+ // to pass that `first` through to all the methods that are called to add top-level items to the
+ // container as well as to close the container. This lets the JSON object use it to keep track
+ // of when it's writing a first object and when it's not. If incrementally writing multiple
+ // levels of depth, a new `first` should used for each new container that is opened.
//
- // "depth" -- Indicate the level of depth. This is used for
- // consistent indentation. When writing incrementally, whenever
- // you call a method to add an item to a container, the value of
- // `depth` should be one more than whatever value is passed to the
- // container open and close methods.
+ // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing
+ // incrementally, whenever you call a method to add an item to a container, the value of `depth`
+ // should be one more than whatever value is passed to the container open and close methods.
// Open methods ignore the value of first and set it to false
QPDF_DLL
static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
QPDF_DLL
static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
- // Close methods don't modify first. A true value indicates that
- // we are closing an empty object.
+ // Close methods don't modify first. A true value indicates that we are closing an empty object.
QPDF_DLL
static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
QPDF_DLL
static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
- // The item methods use the value of first to determine if this is
- // the first item and always set it to false.
+ // The item methods use the value of first to determine if this is the first item and always set
+ // it to false.
QPDF_DLL
static void writeDictionaryItem(
Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0);
- // Write just the key of a new dictionary item, useful if writing
- // nested structures. Calls writeNext.
+ // Write just the key of a new dictionary item, useful if writing nested structures. Calls
+ // writeNext.
QPDF_DLL
static void
writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
QPDF_DLL
static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0);
- // If writing nested structures incrementally, call writeNext
- // before opening a new array or container in the midst of an
- // existing one. The `first` you pass to writeNext should be the
- // one for the parent object. The depth should be the one for the
- // child object. Then start a new `first` for the nested item.
- // Note that writeDictionaryKey and writeArrayItem call writeNext
- // for you, so this is most important when writing subsequent
- // items or container openers to an array.
+ // If writing nested structures incrementally, call writeNext before opening a new array or
+ // container in the midst of an existing one. The `first` you pass to writeNext should be the
+ // one for the parent object. The depth should be the one for the child object. Then start a new
+ // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext
+ // for you, so this is most important when writing subsequent items or container openers to an
+ // array.
QPDF_DLL
static void writeNext(Pipeline* p, bool& first, size_t depth = 0);
- // The JSON spec calls dictionaries "objects", but that creates
- // too much confusion when referring to instances of the JSON
- // class.
+ // The JSON spec calls dictionaries "objects", but that creates too much confusion when
+ // referring to instances of the JSON class.
QPDF_DLL
static JSON makeDictionary();
// addDictionaryMember returns the newly added item.
@@ -149,10 +131,9 @@ class JSON
QPDF_DLL
static JSON makeNull();
- // A blob serializes as a string. The function will be called by
- // JSON with a pipeline and should write binary data to the
- // pipeline but not call finish(). JSON will call finish() at the
- // right time.
+ // A blob serializes as a string. The function will be called by JSON with a pipeline and should
+ // write binary data to the pipeline but not call finish(). JSON will call finish() at the right
+ // time.
QPDF_DLL
static JSON makeBlob(std::function<void(Pipeline*)>);
@@ -162,11 +143,9 @@ class JSON
QPDF_DLL
bool isDictionary() const;
- // If the key is already in the dictionary, return true.
- // Otherwise, mark it as seen and return false. This is primarily
- // intended to be used by the parser to detect duplicate keys when
- // the reactor blocks them from being added to the final
- // dictionary.
+ // If the key is already in the dictionary, return true. Otherwise, mark it as seen and return
+ // false. This is primarily intended to be used by the parser to detect duplicate keys when the
+ // reactor blocks them from being added to the final dictionary.
QPDF_DLL
bool checkDictionaryKeySeen(std::string const& key);
@@ -187,45 +166,35 @@ class JSON
QPDF_DLL
bool forEachArrayItem(std::function<void(JSON value)> fn) const;
- // Check this JSON object against a "schema". This is not a schema
- // according to any standard. It's just a template of what the
- // JSON is supposed to contain. The checking does the following:
+ // Check this JSON object against a "schema". This is not a schema according to any standard.
+ // It's just a template of what the JSON is supposed to contain. The checking does the
+ // following:
//
- // * The schema is a nested structure containing dictionaries,
- // single-element arrays, and strings only.
- // * Recursively walk the schema. In the items below, "schema
- // object" refers to an object in the schema, and "checked
- // object" refers to the corresponding part of the object
- // being checked.
- // * If the schema object is a dictionary, the checked object
- // must have a dictionary in the same place with the same
- // keys. If flags contains f_optional, a key in the schema
- // does not have to be present in the object. Otherwise, all
- // keys have to be present. Any key in the object must be
- // present in the schema.
- // * If the schema object is an array of length 1, the checked
- // object may either be a single item or an array of items.
- // The single item or each element of the checked object's
- // array is validated against the single element of the
- // schema's array. The rationale behind this logic is that a
- // single element may appear wherever the schema allows a
- // variable-length array. This makes it possible to start
- // allowing an array in the future where a single element was
- // previously required without breaking backward
- // compatibility.
- // * If the schema object is an array of length > 1, the checked
- // object must be an array of the same length. In this case,
- // each element of the checked object array is validated
+ // * The schema is a nested structure containing dictionaries, single-element arrays, and
+ // strings only.
+ // * Recursively walk the schema. In the items below, "schema object" refers to an object in
+ // the schema, and "checked object" refers to the corresponding part of the object being
+ // checked.
+ // * If the schema object is a dictionary, the checked object must have a dictionary in the
+ // same place with the same keys. If flags contains f_optional, a key in the schema does not
+ // have to be present in the object. Otherwise, all keys have to be present. Any key in the
+ // object must be present in the schema.
+ // * If the schema object is an array of length 1, the checked object may either be a single
+ // item or an array of items. The single item or each element of the checked object's
+ // array is validated against the single element of the schema's array. The rationale behind
+ // this logic is that a single element may appear wherever the schema allows a
+ // variable-length array. This makes it possible to start allowing an array in the future
+ // where a single element was previously required without breaking backward compatibility.
+ // * If the schema object is an array of length > 1, the checked object must be an array of
+ // the same length. In this case, each element of the checked object array is validated
// against the corresponding element of the schema array.
- // * Otherwise, the value must be a string whose value is a
- // description of the object's corresponding value, which may
- // have any type.
+ // * Otherwise, the value must be a string whose value is a description of the object's
+ // corresponding value, which may have any type.
//
- // QPDF's JSON output conforms to certain strict compatibility
- // rules as discussed in the manual. The idea is that a JSON
- // structure created manually in qpdf.cc doubles as both JSON help
- // information and a schema for validating the JSON that qpdf
- // generates. Any discrepancies are a bug in qpdf.
+ // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual.
+ // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help
+ // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a
+ // bug in qpdf.
//
// Flags is a bitwise or of values from check_flags_e.
enum check_flags_e {
@@ -239,9 +208,8 @@ class JSON
QPDF_DLL
bool checkSchema(JSON schema, std::list<std::string>& errors);
- // An pointer to a Reactor class can be passed to parse, which
- // will enable the caller to react to incremental events in the
- // construction of the JSON object. This makes it possible to
+ // An pointer to a Reactor class can be passed to parse, which will enable the caller to react
+ // to incremental events in the construction of the JSON object. This makes it possible to
// implement SAX-like handling of very large JSON objects.
class QPDF_DLL_CLASS Reactor
{
@@ -249,17 +217,14 @@ class JSON
QPDF_DLL
virtual ~Reactor() = default;
- // The start/end methods are called when parsing of a
- // dictionary or array is started or ended. The item methods
- // are called when an item is added to a dictionary or array.
- // When adding a container to another container, the item
- // method is called with an empty container before the lower
- // container's start method is called. See important notes in
+ // The start/end methods are called when parsing of a dictionary or array is started or
+ // ended. The item methods are called when an item is added to a dictionary or array. When
+ // adding a container to another container, the item method is called with an empty
+ // container before the lower container's start method is called. See important notes in
// "Item methods" below.
- // During parsing of a JSON string, the parser is operating on
- // a single object at a time. When a dictionary or array is
- // started, a new context begins, and when that dictionary or
+ // During parsing of a JSON string, the parser is operating on a single object at a time.
+ // When a dictionary or array is started, a new context begins, and when that dictionary or
// array is ended, the previous context is resumed. So, for
// example, if you have `{"a": [1]}`, you will receive the
// following method calls
@@ -271,9 +236,8 @@ class JSON
// containerEnd -- now current object is the dictionary again
// containerEnd -- current object is undefined
//
- // If the top-level item in a JSON string is a scalar, the
- // topLevelScalar() method will be called. No argument is
- // passed since the object is the same as what is returned by
+ // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be
+ // called. No argument is passed since the object is the same as what is returned by
// parse().
QPDF_DLL
@@ -287,21 +251,17 @@ class JSON
// Item methods:
//
- // The return value of the item methods indicate whether the
- // item has been "consumed". If the item method returns true,
- // then the item will not be added to the containing JSON
+ // The return value of the item methods indicate whether the item has been "consumed". If
+ // the item method returns true, then the item will not be added to the containing JSON
// object. This is what allows arbitrarily large JSON objects
// to be parsed and not have to be kept in memory.
//
- // NOTE: When a dictionary or an array is added to a
- // container, the dictionaryItem or arrayItem method is called
- // when the child item's start delimiter is encountered, so
- // the JSON object passed in at that time will always be in
- // its initial, empty state. Additionally, the child item's
- // start method is not called until after the parent item's
- // item method is called. This makes it possible to keep track
- // of the current depth level by incrementing level on start
- // methods and decrementing on end methods.
+ // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or
+ // arrayItem method is called when the child item's start delimiter is encountered, so the
+ // JSON object passed in at that time will always be in its initial, empty state.
+ // Additionally, the child item's start method is not called until after the parent item's
+ // item method is called. This makes it possible to keep track of the current depth level by
+ // incrementing level on start methods and decrementing on end methods.
QPDF_DLL
virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0;
@@ -312,14 +272,13 @@ class JSON
// Create a JSON object from a string.
QPDF_DLL
static JSON parse(std::string const&);
- // Create a JSON object from an input source. See above for
- // information about how to use the Reactor.
+ // Create a JSON object from an input source. See above for information about how to use the
+ // Reactor.
QPDF_DLL
static JSON parse(InputSource&, Reactor* reactor = nullptr);
- // parse calls setOffsets to set the inclusive start and
- // non-inclusive end offsets of an object relative to its input
- // string. Otherwise, both values are 0.
+ // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object
+ // relative to its input string. Otherwise, both values are 0.
QPDF_DLL
void setStart(qpdf_offset_t);
QPDF_DLL
diff --git a/include/qpdf/Pipeline.hh b/include/qpdf/Pipeline.hh
index a35e7ec6..9b5bc483 100644
--- a/include/qpdf/Pipeline.hh
+++ b/include/qpdf/Pipeline.hh
@@ -2,44 +2,36 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
-// Generalized Pipeline interface. By convention, subclasses of
-// Pipeline are called Pl_Something.
+// Generalized Pipeline interface. By convention, subclasses of Pipeline are called Pl_Something.
//
-// When an instance of Pipeline is created with a pointer to a next
-// pipeline, that pipeline writes its data to the next one when it
-// finishes with it. In order to make possible a usage style in which
-// a pipeline may be passed to a function which may stick other
-// pipelines in front of it, the allocator of a pipeline is
-// responsible for its destruction. In other words, one pipeline
-// object does not attempt to manage the memory of its successor.
+// When an instance of Pipeline is created with a pointer to a next pipeline, that pipeline writes
+// its data to the next one when it finishes with it. In order to make possible a usage style in
+// which a pipeline may be passed to a function which may stick other pipelines in front of it, the
+// allocator of a pipeline is responsible for its destruction. In other words, one pipeline object
+// does not attempt to manage the memory of its successor.
//
-// The client is required to call finish() before destroying a
-// Pipeline in order to avoid loss of data. A Pipeline class should
-// not throw an exception in the destructor if this hasn't been done
+// The client is required to call finish() before destroying a Pipeline in order to avoid loss of
+// data. A Pipeline class should not throw an exception in the destructor if this hasn't been done
// though since doing so causes too much trouble when deleting
// pipelines during error conditions.
//
-// Some pipelines are reusable (i.e., you can call write() after
-// calling finish() and can call finish() multiple times) while others
-// are not. It is up to the caller to use a pipeline according to its
-// own restrictions.
+// Some pipelines are reusable (i.e., you can call write() after calling finish() and can call
+// finish() multiple times) while others are not. It is up to the caller to use a pipeline
+// according to its own restrictions.
#ifndef PIPELINE_HH
#define PIPELINE_HH
@@ -50,8 +42,8 @@
#include <memory>
#include <string>
-// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so
-// it will work with dynamic_cast across the shared object boundary.
+// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so it will work with
+// dynamic_cast across the shared object boundary.
class QPDF_DLL_CLASS Pipeline
{
public:
@@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline
QPDF_DLL
virtual ~Pipeline() = default;
- // Subclasses should implement write and finish to do their jobs
- // and then, if they are not end-of-line pipelines, call
- // getNext()->write or getNext()->finish.
+ // Subclasses should implement write and finish to do their jobs and then, if they are not
+ // end-of-line pipelines, call getNext()->write or getNext()->finish.
QPDF_DLL
virtual void write(unsigned char const* data, size_t len) = 0;
QPDF_DLL
@@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline
QPDF_DLL
std::string getIdentifier() const;
- // These are convenience methods for making it easier to write
- // certain other types of data to pipelines without having to
- // cast. The methods that take char const* expect null-terminated
- // C strings and do not write the null terminators.
+ // These are convenience methods for making it easier to write certain other types of data to
+ // pipelines without having to cast. The methods that take char const* expect null-terminated C
+ // strings and do not write the null terminators.
QPDF_DLL
void writeCStr(char const* cstr);
QPDF_DLL
void writeString(std::string const&);
- // This allows *p << "x" << "y" but is not intended to be a
- // general purpose << compatible with ostream and does not have
- // local awareness or the ability to be "imbued" with properties.
+ // This allows *p << "x" << "y" but is not intended to be a general purpose << compatible with
+ // ostream and does not have local awareness or the ability to be "imbued" with properties.
QPDF_DLL
Pipeline& operator<<(char const* cstr);
QPDF_DLL
diff --git a/include/qpdf/Pl_Buffer.hh b/include/qpdf/Pl_Buffer.hh
index ffc01d95..b244a9f5 100644
--- a/include/qpdf/Pl_Buffer.hh
+++ b/include/qpdf/Pl_Buffer.hh
@@ -2,36 +2,31 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef PL_BUFFER_HH
#define PL_BUFFER_HH
-// This pipeline accumulates the data passed to it into a memory
-// buffer. Each subsequent use of this buffer appends to the data
-// accumulated so far. getBuffer() may be called only after calling
-// finish() and before calling any subsequent write(). At that point,
-// a dynamically allocated Buffer object is returned and the internal
-// buffer is reset. The caller is responsible for deleting the
-// returned Buffer.
+// This pipeline accumulates the data passed to it into a memory buffer. Each subsequent use of
+// this buffer appends to the data accumulated so far. getBuffer() may be called only after calling
+// finish() and before calling any subsequent write(). At that point, a dynamically allocated
+// Buffer object is returned and the internal buffer is reset. The caller is responsible for
+// deleting the returned Buffer.
//
-// For this pipeline, "next" may be null. If a next pointer is
-// provided, this pipeline will also pass the data through to it.
+// For this pipeline, "next" may be null. If a next pointer is provided, this pipeline will also
+// pass the data through to it.
#include <qpdf/Buffer.hh>
#include <qpdf/Pipeline.hh>
@@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
QPDF_DLL
std::shared_ptr<Buffer> getBufferSharedPointer();
- // getMallocBuffer behaves in the same was as getBuffer except the
- // buffer is allocated with malloc(), making it suitable for use
- // when calling from other languages. If there is no data, *buf is
- // set to a null pointer and *len is set to 0. Otherwise, *buf is
- // a buffer of size *len allocated with malloc(). It is the
- // caller's responsibility to call free() on the buffer.
+ // getMallocBuffer behaves in the same was as getBuffer except the buffer is allocated with
+ // malloc(), making it suitable for use when calling from other languages. If there is no data,
+ // *buf is set to a null pointer and *len is set to 0. Otherwise, *buf is a buffer of size *len
+ // allocated with malloc(). It is the caller's responsibility to call free() on the buffer.
QPDF_DLL
void getMallocBuffer(unsigned char** buf, size_t* len);
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 87849dbd..707899cd 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -2,22 +2,19 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDF_HH
#define QPDF_HH
@@ -55,8 +52,7 @@ class QPDFParser;
class QPDF
{
public:
- // Get the current version of the QPDF software. See also
- // qpdf/DLL.h
+ // Get the current version of the QPDF software. See also qpdf/DLL.h
QPDF_DLL
static std::string const& QPDFVersion();
@@ -68,92 +64,74 @@ class QPDF
QPDF_DLL
static std::shared_ptr<QPDF> create();
- // Associate a file with a QPDF object and do initial parsing of
- // the file. PDF objects are not read until they are needed. A
- // QPDF object may be associated with only one file in its
- // lifetime. This method must be called before any methods that
- // potentially ask for information about the PDF file are called.
- // Prior to calling this, the only methods that are allowed are
- // those that set parameters. If the input file is not
- // encrypted,either a null password or an empty password can be
- // used. If the file is encrypted, either the user password or
- // the owner password may be supplied. The method
- // setPasswordIsHexKey may be called prior to calling this method
- // or any of the other process methods to force the password to be
- // interpreted as a raw encryption key. See comments on
- // setPasswordIsHexKey for more information.
+ // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not
+ // read until they are needed. A QPDF object may be associated with only one file in its
+ // lifetime. This method must be called before any methods that potentially ask for information
+ // about the PDF file are called. Prior to calling this, the only methods that are allowed are
+ // those that set parameters. If the input file is not encrypted,either a null password or an
+ // empty password can be used. If the file is encrypted, either the user password or the owner
+ // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this
+ // method or any of the other process methods to force the password to be interpreted as a raw
+ // encryption key. See comments on setPasswordIsHexKey for more information.
QPDF_DLL
void processFile(char const* filename, char const* password = nullptr);
- // Parse a PDF from a stdio FILE*. The FILE must be open in
- // binary mode and must be seekable. It may be open read only.
- // This works exactly like processFile except that the PDF file is
- // read from an already opened FILE*. If close_file is true, the
- // file will be closed at the end. Otherwise, the caller is
- // responsible for closing the file.
+ // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable.
+ // It may be open read only. This works exactly like processFile except that the PDF file is
+ // read from an already opened FILE*. If close_file is true, the file will be closed at the
+ // end. Otherwise, the caller is responsible for closing the file.
QPDF_DLL
void processFile(
char const* description, FILE* file, bool close_file, char const* password = nullptr);
- // Parse a PDF file loaded into a memory buffer. This works
- // exactly like processFile except that the PDF file is in memory
- // instead of on disk. The description appears in any warning or
+ // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except
+ // that the PDF file is in memory instead of on disk. The description appears in any warning or
// error message in place of the file name.
QPDF_DLL
void processMemoryFile(
char const* description, char const* buf, size_t length, char const* password = nullptr);
- // Parse a PDF file loaded from a custom InputSource. If you have
- // your own method of retrieving a PDF file, you can subclass
- // InputSource and use this method.
+ // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving
+ // a PDF file, you can subclass InputSource and use this method.
QPDF_DLL
void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr);
- // Create a PDF from an input source that contains JSON as written
- // by writeJSON (or qpdf --json-output, version 2 or higher). The
- // JSON must be a complete representation of a PDF. See "qpdf
- // JSON" in the manual for details. The input JSON may be
- // arbitrarily large. QPDF does not load stream data into memory
- // for more than one stream at a time, even if the stream data is
+ // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf
+ // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See
+ // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not
+ // load stream data into memory for more than one stream at a time, even if the stream data is
// specified inline.
QPDF_DLL
void createFromJSON(std::string const& json_file);
QPDF_DLL
void createFromJSON(std::shared_ptr<InputSource>);
- // Update a PDF from an input source that contains JSON in the
- // same format as is written by writeJSON (or qpdf --json-output,
- // version 2 or higher). Objects in the PDF and not in the JSON
- // are not modified. See "qpdf JSON" in the manual for details. As
- // with createFromJSON, the input JSON may be arbitrarily large.
+ // Update a PDF from an input source that contains JSON in the same format as is written by
+ // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the
+ // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the
+ // input JSON may be arbitrarily large.
QPDF_DLL
void updateFromJSON(std::string const& json_file);
QPDF_DLL
void updateFromJSON(std::shared_ptr<InputSource>);
- // Write qpdf JSON format to the pipeline "p". The only supported
- // version is 2. The finish() method is not called on the
- // pipeline.
+ // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish()
+ // method is not called on the pipeline.
//
- // The decode_level parameter controls which streams are
- // uncompressed in the JSON. Use qpdf_dl_none to preserve all
- // stream data exactly as it appears in the input. The possible
- // values for json_stream_data can be found in qpdf/Constants.h
- // and correspond to the --json-stream-data command-line argument.
- // If json_stream_data is qpdf_sj_file, file_prefix must be
- // specified. Each stream will be written to a file whose path is
- // constructed by appending "-nnn" to file_prefix, where "nnn" is
- // the object number (not zero-filled). If wanted_objects is
- // empty, write all objects. Otherwise, write only objects whose
- // keys are in wanted_objects. Keys may be either "trailer" or of
- // the form "obj:n n R". Invalid keys are ignored. This
- // corresponds to the --json-object command-line argument.
+ // The decode_level parameter controls which streams are uncompressed in the JSON. Use
+ // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible
+ // values for json_stream_data can be found in qpdf/Constants.h and correspond to the
+ // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix
+ // must be specified. Each stream will be written to a file whose path is constructed by
+ // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If
+ // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in
+ // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are
+ // ignored. This corresponds to the --json-object command-line argument.
//
- // QPDF is efficient with regard to memory when writing, allowing
- // you to write arbitrarily large PDF files to a pipeline. You can
- // use a pipeline like Pl_Buffer or Pl_String to capture the JSON
- // output in memory, but do so with caution as this will allocate
- // enough memory to hold the entire PDF file.
+ // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large
+ // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the
+ // JSON output in memory, but do so with caution as this will allocate enough memory to hold the
+ // entire PDF file.
QPDF_DLL
void writeJSON(
int version,
@@ -163,17 +141,13 @@ class QPDF
std::string const& file_prefix,
std::set<std::string> wanted_objects);
- // This version of writeJSON enables writing only the "qpdf" key
- // of an in-progress dictionary. If the value of "complete" is
- // true, a complete JSON object containing only the "qpdf" key is
- // written to the pipeline. If the value of "complete" is false,
- // the "qpdf" key and its value are written to the pipeline
- // assuming that a dictionary is already open. The parameter
- // first_key indicates whether this is the first key in an
- // in-progress dictionary. It will be set to false by writeJSON.
- // The "qpdf" key and value are written as if at depth 1 in a
- // prettified JSON output. Remaining arguments are the same as the
- // above version.
+ // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary.
+ // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is
+ // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value
+ // are written to the pipeline assuming that a dictionary is already open. The parameter
+ // first_key indicates whether this is the first key in an in-progress dictionary. It will be
+ // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a
+ // prettified JSON output. Remaining arguments are the same as the above version.
QPDF_DLL
void writeJSON(
int version,
@@ -185,169 +159,135 @@ class QPDF
std::string const& file_prefix,
std::set<std::string> wanted_objects);
- // Close or otherwise release the input source. Once this has been
- // called, no other methods of qpdf can be called safely except
- // for getWarnings and anyWarnings(). After this has been called,
- // it is safe to perform operations on the input file such as
- // deleting or renaming it.
+ // Close or otherwise release the input source. Once this has been called, no other methods of
+ // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been
+ // called, it is safe to perform operations on the input file such as deleting or renaming it.
QPDF_DLL
void closeInputSource();
- // For certain forensic or investigatory purposes, it may
- // sometimes be useful to specify the encryption key directly,
- // even though regular PDF applications do not provide a way to do
- // this. Calling setPasswordIsHexKey(true) before calling any of
- // the process methods will bypass the normal encryption key
- // computation or recovery mechanisms and interpret the bytes in
- // the password as a hex-encoded encryption key. Note that we
- // hex-encode the key because it may contain null bytes and
- // therefore can't be represented in a char const*.
+ // For certain forensic or investigatory purposes, it may sometimes be useful to specify the
+ // encryption key directly, even though regular PDF applications do not provide a way to do
+ // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass
+ // the normal encryption key computation or recovery mechanisms and interpret the bytes in the
+ // password as a hex-encoded encryption key. Note that we hex-encode the key because it may
+ // contain null bytes and therefore can't be represented in a char const*.
QPDF_DLL
void setPasswordIsHexKey(bool);
- // Create a QPDF object for an empty PDF. This PDF has no pages
- // or objects other than a minimal trailer, a document catalog,
- // and a /Pages tree containing zero pages. Pages and other
- // objects can be added to the file in the normal way, and the
- // trailer and document catalog can be mutated. Calling this
- // method is equivalent to calling processFile on an equivalent
- // PDF file. See the pdf-create.cc example for a demonstration of
- // how to use this method to create a PDF file from scratch.
+ // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal
+ // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other
+ // objects can be added to the file in the normal way, and the trailer and document catalog can
+ // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF
+ // file. See the pdf-create.cc example for a demonstration of how to use this method to create
+ // a PDF file from scratch.
QPDF_DLL
void emptyPDF();
- // From 10.1: register a new filter implementation for a specific
- // stream filter. You can add your own implementations for new
- // filter types or override existing ones provided by the library.
- // Registered stream filters are used for decoding only as you can
- // override encoding with stream data providers. For example, you
- // could use this method to add support for one of the other filter
- // types by using additional third-party libraries that qpdf does
- // not presently use. The standard filters are implemented using
- // QPDFStreamFilter classes.
+ // From 10.1: register a new filter implementation for a specific stream filter. You can add
+ // your own implementations for new filter types or override existing ones provided by the
+ // library. Registered stream filters are used for decoding only as you can override encoding
+ // with stream data providers. For example, you could use this method to add support for one of
+ // the other filter types by using additional third-party libraries that qpdf does not presently
+ // use. The standard filters are implemented using QPDFStreamFilter classes.
QPDF_DLL
static void registerStreamFilter(
std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory);
// Parameter settings
- // To capture or redirect output, configure the logger returned by
- // getLogger(). By default, all QPDF and QPDFJob objects share the
- // global logger. If you need a private logger for some reason,
- // pass a new one to setLogger(). See comments in QPDFLogger.hh
- // for details on configuring the logger.
+ // To capture or redirect output, configure the logger returned by getLogger(). By default, all
+ // QPDF and QPDFJob objects share the global logger. If you need a private logger for some
+ // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on
+ // configuring the logger.
//
- // Note that no normal QPDF operations generate output to standard
- // output, so for applications that just wish to avoid creating
- // output for warnings and don't call any check functions, calling
- // setSuppressWarnings(true) is sufficient.
+ // Note that no normal QPDF operations generate output to standard output, so for applications
+ // that just wish to avoid creating output for warnings and don't call any check functions,
+ // calling setSuppressWarnings(true) is sufficient.
QPDF_DLL
std::shared_ptr<QPDFLogger> getLogger();
QPDF_DLL
void setLogger(std::shared_ptr<QPDFLogger>);
- // This deprecated method is the old way to capture output, but it
- // didn't capture all output. See comments above for getLogger and
- // setLogger. This will be removed in QPDF 12. For now, it
- // configures a private logger, separating this object from the
- // default logger, and calls setOutputStreams on that logger. See
- // QPDFLogger.hh for additional details.
+ // This deprecated method is the old way to capture output, but it didn't capture all output.
+ // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it
+ // configures a private logger, separating this object from the default logger, and calls
+ // setOutputStreams on that logger. See QPDFLogger.hh for additional details.
[[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void
setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
- // If true, ignore any cross-reference streams in a hybrid file
- // (one that contains both cross-reference streams and
- // cross-reference tables). This can be useful for testing to
+ // If true, ignore any cross-reference streams in a hybrid file (one that contains both
+ // cross-reference streams and cross-reference tables). This can be useful for testing to
// ensure that a hybrid file would work with an older reader.
QPDF_DLL
void setIgnoreXRefStreams(bool);
- // By default, any warnings are issued to std::cerr or the error
- // stream specified in a call to setOutputStreams as they are
- // encountered. If this method is called with a true value, reporting of
- // warnings is suppressed. You may still retrieve warnings by
- // calling getWarnings.
+ // By default, any warnings are issued to std::cerr or the error stream specified in a call to
+ // setOutputStreams as they are encountered. If this method is called with a true value,
+ // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings.
QPDF_DLL
void setSuppressWarnings(bool);
- // By default, QPDF will try to recover if it finds certain types
- // of errors in PDF files. If turned off, it will throw an
- // exception on the first such problem it finds without attempting
+ // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If
+ // turned off, it will throw an exception on the first such problem it finds without attempting
// recovery.
QPDF_DLL
void setAttemptRecovery(bool);
- // Tell other QPDF objects that streams copied from this QPDF need
- // to be fully copied when copyForeignObject is called on them.
- // Calling setIgnoreXRefStreams(true) on a QPDF object makes it
- // possible for the object and its input source to disappear
- // before streams copied from it are written with the destination
- // QPDF object. Confused? Ordinarily, if you are going to copy
- // objects from a source QPDF object to a destination QPDF object
- // using copyForeignObject or addPage, the source object's input
- // source must stick around until after the destination PDF is
- // written. If you call this method on the source QPDF object, it
- // sends a signal to the destination object that it must fully
- // copy the stream data when copyForeignObject. It will do this by
- // making a copy in RAM. Ordinarily the stream data is copied
- // lazily to avoid unnecessary duplication of the stream data.
- // Note that the stream data is copied into RAM only once
- // regardless of how many objects the stream is copied into. The
- // result is that, if you called setImmediateCopyFrom(true) on a
- // given QPDF object prior to copying any of its streams, you do
- // not need to keep it or its input source around after copying
- // its objects to another QPDF. This is true even if the source
- // streams use StreamDataProvider. Note that this method is called
- // on the QPDF object you are copying FROM, not the one you are
- // copying to. The reasoning for this is that there's no reason a
- // given QPDF may not get objects copied to it from a variety of
- // other objects, some transient and some not. Since what's
- // relevant is whether the source QPDF is transient, the method
- // must be called on the source QPDF, not the destination one.
- // This method will make a copy of the stream in RAM, so be
- // sure you have enough memory to simultaneously hold all the
- // streams you're copying.
+ // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when
+ // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object
+ // makes it possible for the object and its input source to disappear before streams copied from
+ // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to
+ // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject
+ // or addPage, the source object's input source must stick around until after the destination
+ // PDF is written. If you call this method on the source QPDF object, it sends a signal to the
+ // destination object that it must fully copy the stream data when copyForeignObject. It will do
+ // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid
+ // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only
+ // once regardless of how many objects the stream is copied into. The result is that, if you
+ // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams,
+ // you do not need to keep it or its input source around after copying its objects to another
+ // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method
+ // is called on the QPDF object you are copying FROM, not the one you are copying to. The
+ // reasoning for this is that there's no reason a given QPDF may not get objects copied to it
+ // from a variety of other objects, some transient and some not. Since what's relevant is
+ // whether the source QPDF is transient, the method must be called on the source QPDF, not the
+ // destination one. This method will make a copy of the stream in RAM, so be sure you have
+ // enough memory to simultaneously hold all the streams you're copying.
QPDF_DLL
void setImmediateCopyFrom(bool);
// Other public methods
- // Return the list of warnings that have been issued so far and
- // clear the list. This method may be called even if processFile
- // throws an exception. Note that if setSuppressWarnings was not
- // called or was called with a false value, any warnings retrieved
- // here will have already been output.
+ // Return the list of warnings that have been issued so far and clear the list. This method may
+ // be called even if processFile throws an exception. Note that if setSuppressWarnings was not
+ // called or was called with a false value, any warnings retrieved here will have already been
+ // output.
QPDF_DLL
std::vector<QPDFExc> getWarnings();
- // Indicate whether any warnings have been issued so far. Does not
- // clear the list of warnings.
+ // Indicate whether any warnings have been issued so far. Does not clear the list of warnings.
QPDF_DLL
bool anyWarnings() const;
- // Indicate the number of warnings that have been issued since the last
- // call to getWarnings. Does not clear the list of warnings.
+ // Indicate the number of warnings that have been issued since the last call to getWarnings.
+ // Does not clear the list of warnings.
QPDF_DLL
size_t numWarnings() const;
- // Return an application-scoped unique ID for this QPDF object.
- // This is not a globally unique ID. It is constructed using a
- // timestamp and a random number and is intended to be unique
- // among QPDF objects that are created by a single run of an
- // application. While it's very likely that these are actually
- // globally unique, it is not recommended to use them for
- // long-term purposes.
+ // Return an application-scoped unique ID for this QPDF object. This is not a globally unique
+ // ID. It is constructed using a timestamp and a random number and is intended to be unique
+ // among QPDF objects that are created by a single run of an application. While it's very likely
+ // that these are actually globally unique, it is not recommended to use them for long-term
+ // purposes.
QPDF_DLL
unsigned long long getUniqueId() const;
- // Issue a warning on behalf of this QPDF object. It will be
- // emitted with other warnings, following warning suppression
- // rules, and it will be available with getWarnings().
+ // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings,
+ // following warning suppression rules, and it will be available with getWarnings().
QPDF_DLL
void warn(QPDFExc const& e);
- // Same as above but creates the QPDFExc object using the
- // arguments passed to warn. The filename argument to QPDFExc is
- // omitted. This method uses the filename associated with the QPDF
+ // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename
+ // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF
// object.
QPDF_DLL
void warn(
@@ -376,60 +316,48 @@ class QPDF
// Public factory methods
- // Create a new stream. A subsequent call must be made to
- // replaceStreamData() to provide data for the stream. The stream's
- // dictionary may be retrieved by calling getDict(), and the resulting
- // dictionary may be modified. Alternatively, you can create a new
- // dictionary and call replaceDict to install it.
+ // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data
+ // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the
+ // resulting dictionary may be modified. Alternatively, you can create a new dictionary and
+ // call replaceDict to install it.
QPDF_DLL
QPDFObjectHandle newStream();
- // Create a new stream. Use the given buffer as the stream data. The
- // stream dictionary's /Length key will automatically be set to the size of
- // the data buffer. If additional keys are required, the stream's
- // dictionary may be retrieved by calling getDict(), and the resulting
- // dictionary may be modified. This method is just a convenient wrapper
- // around the newStream() and replaceStreamData(). It is a convenience
- // methods for streams that require no parameters beyond the stream length.
- // Note that you don't have to deal with compression yourself if you use
- // QPDFWriter. By default, QPDFWriter will automatically compress
- // uncompressed stream data. Example programs are provided that
- // illustrate this.
+ // Create a new stream. Use the given buffer as the stream data. The stream dictionary's
+ // /Length key will automatically be set to the size of the data buffer. If additional keys are
+ // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting
+ // dictionary may be modified. This method is just a convenient wrapper around the newStream()
+ // and replaceStreamData(). It is a convenience methods for streams that require no parameters
+ // beyond the stream length. Note that you don't have to deal with compression yourself if you
+ // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data.
+ // Example programs are provided that illustrate this.
QPDF_DLL
QPDFObjectHandle newStream(std::shared_ptr<Buffer> data);
- // Create new stream with data from string. This method will
- // create a copy of the data rather than using the user-provided
- // buffer as in the std::shared_ptr<Buffer> version of newStream.
+ // Create new stream with data from string. This method will create a copy of the data rather
+ // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
QPDF_DLL
QPDFObjectHandle newStream(std::string const& data);
- // A reserved object is a special sentinel used for qpdf to
- // reserve a spot for an object that is going to be added to the
- // QPDF object. Normally you don't have to use this type since
- // you can just call QPDF::makeIndirectObject. However, in some
- // cases, if you have to create objects with circular references,
- // you may need to create a reserved object so that you can have a
- // reference to it and then replace the object later. Reserved
- // objects have the special property that they can't be resolved
- // to direct objects. This makes it possible to replace a
- // reserved object with a new object while preserving existing
- // references to them. When you are ready to replace a reserved
- // object with its replacement, use QPDF::replaceReserved for this
- // purpose rather than the more general QPDF::replaceObject. It
- // is an error to try to write a QPDF with QPDFWriter if it has
- // any reserved objects in it.
+ // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
+ // going to be added to the QPDF object. Normally you don't have to use this type since you can
+ // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects
+ // with circular references, you may need to create a reserved object so that you can have a
+ // reference to it and then replace the object later. Reserved objects have the special
+ // property that they can't be resolved to direct objects. This makes it possible to replace a
+ // reserved object with a new object while preserving existing references to them. When you are
+ // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
+ // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a
+ // QPDF with QPDFWriter if it has any reserved objects in it.
QPDF_DLL
QPDFObjectHandle newReserved();
- // Install this object handle as an indirect object and return an
- // indirect reference to it.
+ // Install this object handle as an indirect object and return an indirect reference to it.
QPDF_DLL
QPDFObjectHandle makeIndirectObject(QPDFObjectHandle);
- // Retrieve an object by object ID and generation. Returns an
- // indirect reference to it. The getObject() methods were added
- // for qpdf 11.
+ // Retrieve an object by object ID and generation. Returns an indirect reference to it. The
+ // getObject() methods were added for qpdf 11.
QPDF_DLL
QPDFObjectHandle getObject(QPDFObjGen const&);
QPDF_DLL
@@ -441,83 +369,63 @@ class QPDF
QPDF_DLL
QPDFObjectHandle getObjectByID(int objid, int generation);
- // Replace the object with the given object id with the given
- // object. The object handle passed in must be a direct object,
- // though it may contain references to other indirect objects
- // within it. Prior to qpdf 10.2.1, after calling this method,
- // existing QPDFObjectHandle instances that pointed to the
- // original object still pointed to the original object, resulting
- // in confusing and incorrect behavior. This was fixed in 10.2.1,
- // so existing QPDFObjectHandle objects will start pointing to the
- // newly replaced object. Note that replacing an object with
- // QPDFObjectHandle::newNull() effectively removes the object from
- // the file since a non-existent object is treated as a null
- // object. To replace a reserved object, call replaceReserved
+ // Replace the object with the given object id with the given object. The object handle passed
+ // in must be a direct object, though it may contain references to other indirect objects within
+ // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that
+ // pointed to the original object still pointed to the original object, resulting in confusing
+ // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will
+ // start pointing to the newly replaced object. Note that replacing an object with
+ // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent
+ // object is treated as a null object. To replace a reserved object, call replaceReserved
// instead.
QPDF_DLL
void replaceObject(QPDFObjGen const& og, QPDFObjectHandle);
QPDF_DLL
void replaceObject(int objid, int generation, QPDFObjectHandle);
- // Swap two objects given by ID. Prior to qpdf 10.2.1, existing
- // QPDFObjectHandle instances that reference them objects not
- // notice the swap, but this was fixed in 10.2.1.
+ // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that
+ // reference them objects not notice the swap, but this was fixed in 10.2.1.
QPDF_DLL
void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
QPDF_DLL
void swapObjects(int objid1, int generation1, int objid2, int generation2);
- // Replace a reserved object. This is a wrapper around
- // replaceObject but it guarantees that the underlying object is a
- // reserved object. After this call, reserved will be a reference
- // to replacement.
+ // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the
+ // underlying object is a reserved object. After this call, reserved will be a reference to
+ // replacement.
QPDF_DLL
void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement);
- // Copy an object from another QPDF to this one. Starting with
- // qpdf version 8.3.0, it is no longer necessary to keep the
- // original QPDF around after the call to copyForeignObject as
- // long as the source of any copied stream data is still
- // available. Usually this means you just have to keep the input
- // file around, not the QPDF object. The exception to this is if
- // you copy a stream that gets its data from a
- // QPDFObjectHandle::StreamDataProvider. In this case only, the
- // original stream's QPDF object must stick around because the
- // QPDF object is itself the source of the original stream data.
- // For a more in-depth discussion, please see the TODO file.
- // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on
- // the SOURCE QPDF object (the one you're copying FROM). If you do
- // this prior to copying any of its objects, then neither the
- // source QPDF object nor its input source needs to stick around
- // at all regardless of the source. The cost is that the stream
- // data is copied into RAM at the time copyForeignObject is
+ // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no
+ // longer necessary to keep the original QPDF around after the call to copyForeignObject as long
+ // as the source of any copied stream data is still available. Usually this means you just have
+ // to keep the input file around, not the QPDF object. The exception to this is if you copy a
+ // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the
+ // original stream's QPDF object must stick around because the QPDF object is itself the source
+ // of the original stream data. For a more in-depth discussion, please see the TODO file.
+ // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one
+ // you're copying FROM). If you do this prior to copying any of its objects, then neither the
+ // source QPDF object nor its input source needs to stick around at all regardless of the
+ // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is
// called. See setImmediateCopyFrom for more information.
//
- // The return value of this method is an indirect reference to the
- // copied object in this file. This method is intended to be used
- // to copy non-page objects. To copy page objects, pass the
- // foreign page object directly to addPage (or addPageAt). If you
- // copy objects that contain references to pages, you should copy
- // the pages first using addPage(At). Otherwise references to the
- // pages that have not been copied will be replaced with nulls. It
- // is possible to use copyForeignObject on page objects if you are
- // not going to use them as pages. Doing so copies the object
- // normally but does not update the page structure. For example,
- // it is a valid use case to use copyForeignObject for a page that
- // you are going to turn into a form XObject, though you can also
- // use QPDFPageObjectHelper::getFormXObjectForPage for that
- // purpose.
+ // The return value of this method is an indirect reference to the copied object in this file.
+ // This method is intended to be used to copy non-page objects. To copy page objects, pass the
+ // foreign page object directly to addPage (or addPageAt). If you copy objects that contain
+ // references to pages, you should copy the pages first using addPage(At). Otherwise references
+ // to the pages that have not been copied will be replaced with nulls. It is possible to use
+ // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies
+ // the object normally but does not update the page structure. For example, it is a valid use
+ // case to use copyForeignObject for a page that you are going to turn into a form XObject,
+ // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose.
//
- // When copying objects with this method, object structure will be
- // preserved, so all indirectly referenced indirect objects will
- // be copied as well. This includes any circular references that
- // may exist. The QPDF object keeps a record of what has already
- // been copied, so shared objects will not be copied multiple
- // times. This also means that if you mutate an object that has
- // already been copied and try to copy it again, it won't work
- // since the modified object will not be recopied. Therefore, you
- // should do all mutation on the original file that you are going
- // to do before you start copying its objects to a new file.
+ // When copying objects with this method, object structure will be preserved, so all indirectly
+ // referenced indirect objects will be copied as well. This includes any circular references
+ // that may exist. The QPDF object keeps a record of what has already been copied, so shared
+ // objects will not be copied multiple times. This also means that if you mutate an object that
+ // has already been copied and try to copy it again, it won't work since the modified object
+ // will not be recopied. Therefore, you should do all mutation on the original file that you
+ // are going to do before you start copying its objects to a new file.
QPDF_DLL
QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign);
@@ -633,9 +541,8 @@ class QPDF
QPDF_DLL
bool allowModifyAll();
- // Helper function to trim padding from user password. Calling
- // trim_user_password on the result of getPaddedUserPassword gives
- // getTrimmedUserPassword's result.
+ // Helper function to trim padding from user password. Calling trim_user_password on the result
+ // of getPaddedUserPassword gives getTrimmedUserPassword's result.
QPDF_DLL
static void trim_user_password(std::string& user_password);
QPDF_DLL
@@ -678,47 +585,40 @@ class QPDF
std::string& OE,
std::string& UE,
std::string& Perms);
- // Return the full user password as stored in the PDF file. For
- // files encrypted with 40-bit or 128-bit keys, the user password
- // can be recovered when the file is opened using the owner
- // password. This is not possible with newer encryption formats.
- // If you are attempting to recover the user password in a
- // user-presentable form, call getTrimmedUserPassword() instead.
+ // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or
+ // 128-bit keys, the user password can be recovered when the file is opened using the owner
+ // password. This is not possible with newer encryption formats. If you are attempting to
+ // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead.
QPDF_DLL
std::string const& getPaddedUserPassword() const;
- // Return human-readable form of user password subject to same
- // limitations as getPaddedUserPassword().
+ // Return human-readable form of user password subject to same limitations as
+ // getPaddedUserPassword().
QPDF_DLL
std::string getTrimmedUserPassword() const;
- // Return the previously computed or retrieved encryption key for
- // this file
+ // Return the previously computed or retrieved encryption key for this file
QPDF_DLL
std::string getEncryptionKey() const;
- // Remove security restrictions associated with digitally signed
- // files.
+ // Remove security restrictions associated with digitally signed files.
QPDF_DLL
void removeSecurityRestrictions();
// Linearization support
- // Returns true iff the file starts with a linearization parameter
- // dictionary. Does no additional validation.
+ // Returns true iff the file starts with a linearization parameter dictionary. Does no
+ // additional validation.
QPDF_DLL
bool isLinearized();
- // Performs various sanity checks on a linearized file. Return
- // true if no errors or warnings. Otherwise, return false and
- // output errors and warnings to the default output stream
- // (std::cout or whatever is configured in the logger). It is
- // recommended for linearization errors to be treated as warnings.
+ // Performs various sanity checks on a linearized file. Return true if no errors or warnings.
+ // Otherwise, return false and output errors and warnings to the default output stream
+ // (std::cout or whatever is configured in the logger). It is recommended for linearization
+ // errors to be treated as warnings.
QPDF_DLL
bool checkLinearization();
- // Calls checkLinearization() and, if possible, prints normalized
- // contents of some of the hints tables to the default output
- // stream. Normalization includes adding min values to delta
- // values and adjusting offsets based on the location and size of
- // the primary hint stream.
+ // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints
+ // tables to the default output stream. Normalization includes adding min values to delta values
+ // and adjusting offsets based on the location and size of the primary hint stream.
QPDF_DLL
void showLinearizationData();
@@ -726,66 +626,52 @@ class QPDF
QPDF_DLL
void showXRefTable();
- // Starting from qpdf 11.0 user code should not need to call this method.
- // Before 11.0 this method was used to detect all indirect references to
- // objects that don't exist and resolve them by replacing them with null,
- // which is how the PDF spec says to interpret such dangling references.
- // This method is called automatically when you try to add any new objects,
- // if you call getAllObjects, and before a file is written. The qpdf object
- // caches whether it has run this to avoid running it multiple times.
- // Before 11.2.1 you could pass true to force it to run again if you had
- // explicitly added new objects that may have additional dangling
- // references.
+ // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this
+ // method was used to detect all indirect references to objects that don't exist and resolve
+ // them by replacing them with null, which is how the PDF spec says to interpret such dangling
+ // references. This method is called automatically when you try to add any new objects, if you
+ // call getAllObjects, and before a file is written. The qpdf object caches whether it has run
+ // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run
+ // again if you had explicitly added new objects that may have additional dangling references.
QPDF_DLL
void fixDanglingReferences(bool force = false);
- // Return the approximate number of indirect objects. It is
- // approximate because not all objects in the file are preserved
- // in all cases, and gaps in object numbering are not preserved.
+ // Return the approximate number of indirect objects. It is/ approximate because not all objects
+ // in the file are preserved in all cases, and gaps in object numbering are not preserved.
QPDF_DLL
size_t getObjectCount();
- // Returns a list of indirect objects for every object in the xref
- // table. Useful for discovering objects that are not otherwise
- // referenced.
+ // Returns a list of indirect objects for every object in the xref table. Useful for discovering
+ // objects that are not otherwise referenced.
QPDF_DLL
std::vector<QPDFObjectHandle> getAllObjects();
- // Optimization support -- see doc/optimization. Implemented in
- // QPDF_optimization.cc
-
- // The object_stream_data map maps from a "compressed" object to
- // the object stream that contains it. This enables optimize to
- // populate the object <-> user maps with only uncompressed
- // objects. If allow_changes is false, an exception will be thrown
- // if any changes are made during the optimization process. This
- // is available so that the test suite can make sure that a
- // linearized file is already optimized. When called in this way,
- // optimize() still populates the object <-> user maps. The
- // optional skip_stream_parameters parameter, if present, is
- // called for each stream object. The function should return 2 if
- // optimization should discard /Length, /Filter, and /DecodeParms;
- // 1 if it should discard /Length, and 0 if it should preserve all
- // keys. This is used by QPDFWriter to avoid creation of dangling
- // objects for stream dictionary keys it will be regenerating.
+ // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc
+
+ // The object_stream_data map maps from a "compressed" object to the object stream that contains
+ // it. This enables optimize to populate the object <-> user maps with only uncompressed
+ // objects. If allow_changes is false, an exception will be thrown if any changes are made
+ // during the optimization process. This is available so that the test suite can make sure that
+ // a linearized file is already optimized. When called in this way, optimize() still populates
+ // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is
+ // called for each stream object. The function should return 2 if optimization should discard
+ // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should
+ // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for
+ // stream dictionary keys it will be regenerating.
QPDF_DLL
void optimize(
std::map<int, int> const& object_stream_data,
bool allow_changes = true,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
- // Traverse page tree return all /Page objects. It also detects
- // and resolves cases in which the same /Page object is
- // duplicated. For efficiency, this method returns a const
- // reference to an internal vector of pages. Calls to addPage,
- // addPageAt, and removePage safely update this, but directly
- // manipulation of the pages tree or pushing inheritable objects
- // to the page level may invalidate it. See comments for
- // updateAllPagesCache() for additional notes. Newer code should
- // use QPDFPageDocumentHelper::getAllPages instead. The decision
- // to expose this internal cache was arguably incorrect, but it is
- // being left here for compatibility. It is, however, completely
- // safe to use this for files that you are not modifying.
+ // Traverse page tree return all /Page objects. It also detects and resolves cases in which the
+ // same /Page object is duplicated. For efficiency, this method returns a const reference to an
+ // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but
+ // directly manipulation of the pages tree or pushing inheritable objects to the page level may
+ // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should
+ // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache
+ // was arguably incorrect, but it is being left here for compatibility. It is, however,
+ // completely safe to use this for files that you are not modifying.
QPDF_DLL
std::vector<QPDFObjectHandle> const& getAllPages();
@@ -794,40 +680,32 @@ class QPDF
QPDF_DLL
bool everPushedInheritedAttributesToPages() const;
- // These methods, given a page object or its object/generation
- // number, returns the 0-based index into the array returned by
- // getAllPages() for that page. An exception is thrown if the page
- // is not found.
+ // These methods, given a page object or its object/generation number, returns the 0-based index
+ // into the array returned by getAllPages() for that page. An exception is thrown if the page is
+ // not found.
QPDF_DLL
int findPage(QPDFObjGen const& og);
QPDF_DLL
int findPage(QPDFObjectHandle& page);
- // This method synchronizes QPDF's cache of the page structure
- // with the actual /Pages tree. If you restrict changes to the
- // /Pages tree, including addition, removal, or replacement of
- // pages or changes to any /Pages objects, to calls to these page
- // handling APIs, you never need to call this method. If you
- // modify /Pages structures directly, you must call this method
- // afterwards. This method updates the internal list of pages, so
- // after calling this method, any previous references returned by
- // getAllPages() will be valid again. It also resets any state
- // about having pushed inherited attributes in /Pages objects down
- // to the pages, so if you add any inheritable attributes to a
- // /Pages object, you should also call this method.
+ // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If
+ // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages
+ // or changes to any /Pages objects, to calls to these page handling APIs, you never need to
+ // call this method. If you modify /Pages structures directly, you must call this method
+ // afterwards. This method updates the internal list of pages, so after calling this method,
+ // any previous references returned by getAllPages() will be valid again. It also resets any
+ // state about having pushed inherited attributes in /Pages objects down to the pages, so if you
+ // add any inheritable attributes to a /Pages object, you should also call this method.
QPDF_DLL
void updateAllPagesCache();
- // Legacy handling API. These methods are not going anywhere, and
- // you should feel free to continue using them if it simplifies
- // your code. Newer code should make use of QPDFPageDocumentHelper
- // instead as future page handling methods will be added there.
- // The functionality and specification of these legacy methods is
- // identical to the identically named methods there, except that
- // these versions use QPDFObjectHandle instead of
- // QPDFPageObjectHelper, so please see comments in that file for
- // descriptions. There are subtleties you need to know about, so
- // please look at the comments there.
+ // Legacy handling API. These methods are not going anywhere, and you should feel free to
+ // continue using them if it simplifies your code. Newer code should make use of
+ // QPDFPageDocumentHelper instead as future page handling methods will be added there. The
+ // functionality and specification of these legacy methods is identical to the identically named
+ // methods there, except that these versions use QPDFObjectHandle instead of
+ // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are
+ // subtleties you need to know about, so please look at the comments there.
QPDF_DLL
void pushInheritedAttributesToPage();
QPDF_DLL
@@ -838,8 +716,7 @@ class QPDF
void removePage(QPDFObjectHandle page);
// End legacy page helpers
- // Writer class is restricted to QPDFWriter so that only it can
- // call certain methods.
+ // Writer class is restricted to QPDFWriter so that only it can call certain methods.
class Writer
{
friend class QPDFWriter;
@@ -884,8 +761,8 @@ class QPDF
}
};
- // The Resolver class is restricted to QPDFObject so that only it
- // can resolve indirect references.
+ // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
+ // references.
class Resolver
{
friend class QPDFObject;
@@ -898,8 +775,7 @@ class QPDF
}
};
- // StreamCopier class is restricted to QPDFObjectHandle so it can
- // copy stream data.
+ // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
class StreamCopier
{
friend class QPDFObjectHandle;
@@ -974,12 +850,10 @@ class QPDF
static bool test_json_validators();
private:
- // It has never been safe to copy QPDF objects as there is code in
- // the library that assumes there are no copies of a QPDF object.
- // Copying QPDF objects was not prevented by the API until qpdf
- // 11. If you have been copying QPDF objects, use
- // std::shared_ptr<QPDF> instead. From qpdf 11, you can use
- // QPDF::create to create them.
+ // It has never been safe to copy QPDF objects as there is code in the library that assumes
+ // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until
+ // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr<QPDF> instead. From qpdf
+ // 11, you can use QPDF::create to create them.
QPDF(QPDF const&) = delete;
QPDF& operator=(QPDF const&) = delete;
@@ -1200,8 +1074,8 @@ class QPDF
// For QPDFWriter:
- // Get lists of all objects in order according to the part of a
- // linearized file that they belong to.
+ // Get lists of all objects in order according to the part of a linearized file that they belong
+ // to.
void getLinearizedParts(
std::map<int, int> const& object_stream_data,
std::vector<QPDFObjectHandle>& part4,
@@ -1221,8 +1095,7 @@ class QPDF
// Map object to object stream that contains it
void getObjectStreamData(std::map<int, int>&);
- // Get a list of objects that would be permitted in an object
- // stream.
+ // Get a list of objects that would be permitted in an object stream.
std::vector<QPDFObjGen> getCompressibleObjGens();
// methods to support page handling
@@ -1418,20 +1291,16 @@ class QPDF
qpdf_offset_t H_length; // length of primary hint stream
};
- // Computed hint table value data structures. These tables
- // contain the computed values on which the hint table values are
- // based. They exclude things like number of bits and store
- // actual values instead of mins and deltas. File offsets are
- // also absolute rather than being offset by the size of the
- // primary hint table. We populate the hint table structures from
- // these during writing and compare the hint table values with
- // these during validation. We ignore some values for various
- // reasons described in the code. Those values are omitted from
- // these structures. Note also that object numbers are object
- // numbers from the input file, not the output file.
-
- // Naming convention: CHSomething is analogous to HSomething
- // above. "CH" is computed hint.
+ // Computed hint table value data structures. These tables contain the computed values on which
+ // the hint table values are based. They exclude things like number of bits and store actual
+ // values instead of mins and deltas. File offsets are also absolute rather than being offset
+ // by the size of the primary hint table. We populate the hint table structures from these
+ // during writing and compare the hint table values with these during validation. We ignore
+ // some values for various reasons described in the code. Those values are omitted from these
+ // structures. Note also that object numbers are object numbers from the input file, not the
+ // output file.
+
+ // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
struct CHPageOffsetEntry
{
@@ -1482,8 +1351,7 @@ class QPDF
// No need for CHGeneric -- HGeneric is fine as is.
- // Data structures to support optimization -- implemented in
- // QPDF_optimization.cc
+ // Data structures to support optimization -- implemented in QPDF_optimization.cc
class ObjUser
{
@@ -1535,8 +1403,7 @@ class QPDF
bool findStartxref();
bool findEndstream();
- // methods to support linearization checking -- implemented in
- // QPDF_linearization.cc
+ // methods to support linearization checking -- implemented in QPDF_linearization.cc
void readLinearizationData();
bool checkLinearizationInternal();
void dumpLinearizationDataInternal();
@@ -1693,25 +1560,23 @@ class QPDF
bool uncompressed_after_compressed{false};
bool linearization_warnings{false};
- // Linearization parameter dictionary and hint table data: may be
- // read from file or computed prior to writing a linearized file
+ // Linearization parameter dictionary and hint table data: may be read from file or computed
+ // prior to writing a linearized file
QPDFObjectHandle lindict;
LinParameters linp;
HPageOffset page_offset_hints;
HSharedObject shared_object_hints;
HGeneric outline_hints;
- // Computed linearization data: used to populate above tables
- // during writing and to compare with them during validation.
- // c_ means computed.
+ // Computed linearization data: used to populate above tables during writing and to compare
+ // with them during validation. c_ means computed.
LinParameters c_linp;
CHPageOffset c_page_offset_data;
CHSharedObject c_shared_object_data;
HGeneric c_outline_data;
- // Object ordering data for linearized files: initialized by
- // calculateLinearizationData(). Part numbers refer to the PDF
- // 1.4 specification.
+ // Object ordering data for linearized files: initialized by calculateLinearizationData().
+ // Part numbers refer to the PDF 1.4 specification.
std::vector<QPDFObjectHandle> part4;
std::vector<QPDFObjectHandle> part6;
std::vector<QPDFObjectHandle> part7;
@@ -1723,9 +1588,8 @@ class QPDF
std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
};
- // Keep all member variables inside the Members object, which we
- // dynamically allocate. This makes it possible to add new private
- // members without breaking binary compatibility.
+ // Keep all member variables inside the Members object, which we dynamically allocate. This
+ // makes it possible to add new private members without breaking binary compatibility.
std::shared_ptr<Members> m;
};
diff --git a/include/qpdf/QPDFAcroFormDocumentHelper.hh b/include/qpdf/QPDFAcroFormDocumentHelper.hh
index 1ae593af..a86563fa 100644
--- a/include/qpdf/QPDFAcroFormDocumentHelper.hh
+++ b/include/qpdf/QPDFAcroFormDocumentHelper.hh
@@ -2,69 +2,55 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDFACROFORMDOCUMENTHELPER_HH
#define QPDFACROFORMDOCUMENTHELPER_HH
-// This document helper is intended to help with operations on
-// interactive forms. Here are the key things to know:
+// This document helper is intended to help with operations on interactive forms. Here are the key
+// things to know:
-// * The PDF specification talks about interactive forms and also
-// about form XObjects. While form XObjects appear in parts of
-// interactive forms, this class is concerned about interactive
-// forms, not form XObjects.
+// * The PDF specification talks about interactive forms and also about form XObjects. While form
+// XObjects appear in parts of interactive forms, this class is concerned about interactive forms,
+// not form XObjects.
//
-// * Interactive forms are discussed in the PDF Specification (ISO PDF
-// 32000-1:2008) section 12.7. Also relevant is the section about
-// Widget annotations. Annotations are discussed in section 12.5
-// with annotation dictionaries discussed in 12.5.1. Widget
-// annotations are discussed specifically in section 12.5.6.19.
+// * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7.
+// Also relevant is the section about Widget annotations. Annotations are discussed in
+// section 12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed
+// specifically in section 12.5.6.19.
//
-// * What you need to know about the structure of interactive forms in
-// PDF files:
+// * What you need to know about the structure of interactive forms in PDF files:
//
-// - The document catalog contains the key "/AcroForm" which
-// contains a list of fields. Fields are represented as a tree
-// structure much like pages. Nodes in the fields tree may contain
-// other fields. Fields may inherit values of many of their
-// attributes from ancestors in the tree.
+// - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are
+// represented as a tree structure much like pages. Nodes in the fields tree may contain other
+// fields. Fields may inherit values of many of their attributes from ancestors in the tree.
//
-// - Fields may also have children that are widget annotations. As a
-// special case, and a cause of considerable confusion, if a field
-// has a single annotation as a child, the annotation dictionary
-// may be merged with the field dictionary. In that case, the
-// field and the annotation are in the same object. Note that,
-// while field dictionary attributes are inherited, annotation
-// dictionary attributes are not.
+// - Fields may also have children that are widget annotations. As a special case, and a cause of
+// considerable confusion, if a field has a single annotation as a child, the annotation
+// dictionary may be merged with the field dictionary. In that case, the field and the
+// annotation are in the same object. Note that, while field dictionary attributes are
+// inherited, annotation dictionary attributes are not.
//
-// - A page dictionary contains a key called "/Annots" which
-// contains a simple list of annotations. For any given annotation
-// of subtype "/Widget", you should encounter that annotation in
-// the "/Annots" dictionary of a page, and you should also be able
-// to reach it by traversing through the "/AcroForm" dictionary
-// from the document catalog. In the simplest case (and also a
-// very common case), a form field's widget annotation will be
-// merged with the field object, and the object will appear
-// directly both under "/Annots" in the page dictionary and under
-// "/Fields" in the "/AcroForm" dictionary. In a more complex
-// case, you may have to trace through various "/Kids" elements in
-// the "/AcroForm" field entry until you find the annotation
+// - A page dictionary contains a key called "/Annots" which contains a simple list of
+// annotations. For any given annotation of subtype "/Widget", you should encounter that
+// annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by
+// traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case
+// (and also a very common case), a form field's widget annotation will be merged with the field
+// object, and the object will appear directly both under "/Annots" in the page dictionary and
+// under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace
+// through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation
// dictionary.
#include <qpdf/QPDFDocumentHelper.hh>
@@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF_DLL
virtual ~QPDFAcroFormDocumentHelper() = default;
- // This class lazily creates an internal cache of the mapping
- // among form fields, annotations, and pages. Methods within this
- // class preserve the validity of this cache. However, if you
- // modify pages' annotation dictionaries, the document's /AcroForm
- // dictionary, or any form fields manually in a way that alters
- // the association between forms, fields, annotations, and pages,
- // it may cause this cache to become invalid. This method marks
- // the cache invalid and forces it to be regenerated the next time
- // it is needed.
+ // This class lazily creates an internal cache of the mapping among form fields, annotations,
+ // and pages. Methods within this class preserve the validity of this cache. However, if you
+ // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form
+ // fields manually in a way that alters the association between forms, fields, annotations, and
+ // pages, it may cause this cache to become invalid. This method marks the cache invalid and
+ // forces it to be regenerated the next time it is needed.
QPDF_DLL
void invalidateCache();
QPDF_DLL
bool hasAcroForm();
- // Add a form field, initializing the document's AcroForm
- // dictionary if needed, updating the cache if necessary. Note
- // that you are adding fields that are copies of other fields,
- // this method may result in multiple fields existing with the
- // same qualified name, which can have unexpected side effects. In
- // that case, you should use addAndRenameFormFields() instead.
+ // Add a form field, initializing the document's AcroForm dictionary if needed, updating the
+ // cache if necessary. Note that you are adding fields that are copies of other fields, this
+ // method may result in multiple fields existing with the same qualified name, which can have
+ // unexpected side effects. In that case, you should use addAndRenameFormFields() instead.
QPDF_DLL
void addFormField(QPDFFormFieldObjectHelper);
- // Add a collection of form fields making sure that their fully
- // qualified names don't conflict with already present form
- // fields. Fields within the collection of new fields that have
- // the same name as each other will continue to do so.
+ // Add a collection of form fields making sure that their fully qualified names don't conflict
+ // with already present form fields. Fields within the collection of new fields that have the
+ // same name as each other will continue to do so.
QPDF_DLL
void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields);
@@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF_DLL
void removeFormFields(std::set<QPDFObjGen> const&);
- // Set the name of a field, updating internal records of field
- // names. Name should be UTF-8 encoded.
+ // Set the name of a field, updating internal records of field names. Name should be UTF-8
+ // encoded.
QPDF_DLL
void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name);
- // Return a vector of all terminal fields in a document. Terminal
- // fields are fields that have no children that are also fields.
- // Terminal fields may still have children that are annotations.
- // Intermediate nodes in the fields tree are not included in this
- // list, but you can still reach them through the getParent method
- // of the field object helper.
+ // Return a vector of all terminal fields in a document. Terminal fields are fields that have no
+ // children that are also fields. Terminal fields may still have children that are annotations.
+ // Intermediate nodes in the fields tree are not included in this list, but you can still reach
+ // them through the getParent method of the field object helper.
QPDF_DLL
std::vector<QPDFFormFieldObjectHelper> getFormFields();
- // Return all the form fields that have the given fully-qualified
- // name and also have an explicit "/T" attribute. For this
- // information to be accurate, any changes to field names must be
- // done through setFormFieldName() above.
+ // Return all the form fields that have the given fully-qualified name and also have an explicit
+ // "/T" attribute. For this information to be accurate, any changes to field names must be done
+ // through setFormFieldName() above.
QPDF_DLL
std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name);
- // Return the annotations associated with a terminal field. Note
- // that in the case of a field having a single annotation, the
- // underlying object will typically be the same as the underlying
- // object for the field.
+ // Return the annotations associated with a terminal field. Note that in the case of a field
+ // having a single annotation, the underlying object will typically be the same as the
+ // underlying object for the field.
QPDF_DLL
std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper);
@@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF_DLL
std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper);
- // Return the terminal field that is associated with this
- // annotation. If the annotation dictionary is merged with the
- // field dictionary, the underlying object will be the same, but
- // this is not always the case. Note that if you call this method
- // with an annotation that is not a widget annotation, there will
- // not be an associated field, and this method will return a
+ // Return the terminal field that is associated with this annotation. If the annotation
+ // dictionary is merged with the field dictionary, the underlying object will be the same, but
+ // this is not always the case. Note that if you call this method with an annotation that is not
+ // a widget annotation, there will not be an associated field, and this method will return a
// helper associated with a null object (isNull() == true).
QPDF_DLL
QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper);
- // Return the current value of /NeedAppearances. If
- // /NeedAppearances is missing, return false as that is how PDF
- // viewers are supposed to interpret it.
+ // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as
+ // that is how PDF viewers are supposed to interpret it.
QPDF_DLL
bool getNeedAppearances();
- // Indicate whether appearance streams must be regenerated. If you
- // modify a field value, you should call setNeedAppearances(true)
- // unless you also generate an appearance stream for the
- // corresponding annotation at the same time. If you generate
- // appearance streams for all fields, you can call
- // setNeedAppearances(false). If you use
- // QPDFFormFieldObjectHelper::setV, it will automatically call
- // this method unless you tell it not to.
+ // Indicate whether appearance streams must be regenerated. If you modify a field value, you
+ // should call setNeedAppearances(true) unless you also generate an appearance stream for the
+ // corresponding annotation at the same time. If you generate appearance streams for all fields,
+ // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will
+ // automatically call this method unless you tell it not to.
QPDF_DLL
void setNeedAppearances(bool);
- // If /NeedAppearances is false, do nothing. Otherwise generate
- // appearance streams for all widget annotations that need them.
- // See comments in QPDFFormFieldObjectHelper.hh for
- // generateAppearance for limitations. For checkbox and radio
- // button fields, this code ensures that appearance state is
- // consistent with the field's value and uses any pre-existing
+ // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all
+ // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for
+ // generateAppearance for limitations. For checkbox and radio button fields, this code ensures
+ // that appearance state is consistent with the field's value and uses any pre-existing
// appearance streams.
QPDF_DLL
void generateAppearancesIfNeeded();
- // Note: this method works on all annotations, not just ones with
- // associated fields. For each annotation in old_annots, apply the
- // given transformation matrix to create a new annotation. New
- // annotations are appended to new_annots. If the annotation is
- // associated with a form field, a new form field is created that
- // points to the new annotation and is appended to new_fields, and
- // the old field is added to old_fields.
+ // Note: this method works on all annotations, not just ones with associated fields. For each
+ // annotation in old_annots, apply the given transformation matrix to create a new annotation.
+ // New annotations are appended to new_annots. If the annotation is associated with a form
+ // field, a new form field is created that points to the new annotation and is appended to
+ // new_fields, and the old field is added to old_fields.
//
- // old_annots may belong to a different QPDF object. In that case,
- // you should pass in from_qpdf, and copyForeignObject will be
- // called automatically. If this is the case, for efficiency, you
- // may pass in a QPDFAcroFormDocumentHelper for the other file to
- // avoid the expensive process of creating one for each call to
- // transformAnnotations. New fields and annotations are not added
- // to the document or pages. You have to do that yourself after
- // calling transformAnnotations. If this operation will leave
- // orphaned fields behind, such as if you are replacing the old
- // annotations with the new ones on the same page and the fields
- // and annotations are not shared, you will also need to remove
- // the old fields to prevent them from hanging round unreferenced.
+ // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf,
+ // and copyForeignObject will be called automatically. If this is the case, for efficiency, you
+ // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of
+ // creating one for each call to transformAnnotations. New fields and annotations are not added
+ // to the document or pages. You have to do that yourself after calling transformAnnotations. If
+ // this operation will leave orphaned fields behind, such as if you are replacing the old
+ // annotations with the new ones on the same page and the fields and annotations are not shared,
+ // you will also need to remove the old fields to prevent them from hanging round unreferenced.
QPDF_DLL
void transformAnnotations(
QPDFObjectHandle old_annots,
@@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF* from_qpdf = nullptr,
QPDFAcroFormDocumentHelper* from_afdh = nullptr);
- // Copy form fields and annotations from one page to another,
- // allowing the from page to be in a different QPDF or in the same
- // QPDF. This would typically be called after calling addPage to
- // add field/annotation awareness. When just copying the page by
- // itself, annotations end up being shared, and fields end up
- // being omitted because there is no reference to the field from
- // the page. This method ensures that each separate copy of a page
- // has private annotations and that fields and annotations are
- // properly updated to resolve conflicts that may occur from
- // common resource and field names across documents. It is
- // basically a wrapper around transformAnnotations that handles
- // updating the receiving page. If new_fields is non-null, any
+ // Copy form fields and annotations from one page to another, allowing the from page to be in a
+ // different QPDF or in the same QPDF. This would typically be called after calling addPage to
+ // add field/annotation awareness. When just copying the page by itself, annotations end up
+ // being shared, and fields end up being omitted because there is no reference to the field from
+ // the page. This method ensures that each separate copy of a page has private annotations and
+ // that fields and annotations are properly updated to resolve conflicts that may occur from
+ // common resource and field names across documents. It is basically a wrapper around
+ // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any
// newly created fields are added to it.
QPDF_DLL
void fixCopiedAnnotations(
diff --git a/include/qpdf/QPDFExc.hh b/include/qpdf/QPDFExc.hh
index b53aa405..1a9e16f2 100644
--- a/include/qpdf/QPDFExc.hh
+++ b/include/qpdf/QPDFExc.hh
@@ -2,22 +2,19 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDFEXC_HH
#define QPDFEXC_HH
@@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
QPDF_DLL
virtual ~QPDFExc() noexcept = default;
- // To get a complete error string, call what(), provided by
- // std::exception. The accessors below return the original values
- // used to create the exception. Only the error code and message
- // are guaranteed to have non-zero/empty values.
+ // To get a complete error string, call what(), provided by std::exception. The accessors below
+ // return the original values used to create the exception. Only the error code and message are
+ // guaranteed to have non-zero/empty values.
- // There is no lookup code that maps numeric error codes into
- // strings. The numeric error code is just another way to get at
- // the underlying issue, but it is more programmer-friendly than
+ // There is no lookup code that maps numeric error codes into strings. The numeric error code
+ // is just another way to get at the underlying issue, but it is more programmer-friendly than
// trying to parse a string that is subject to change.
QPDF_DLL
@@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
qpdf_offset_t offset,
std::string const& message);
- // This class does not use the Members pattern to avoid needless
- // memory allocations during exception handling.
+ // This class does not use the Members pattern to avoid needless memory allocations during
+ // exception handling.
qpdf_error_code_e error_code;
std::string filename;
diff --git a/include/qpdf/QPDFFormFieldObjectHelper.hh b/include/qpdf/QPDFFormFieldObjectHelper.hh
index 2221684a..fbd5547f 100644
--- a/include/qpdf/QPDFFormFieldObjectHelper.hh
+++ b/include/qpdf/QPDFFormFieldObjectHelper.hh
@@ -2,29 +2,25 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDFFORMFIELDOBJECTHELPER_HH
#define QPDFFORMFIELDOBJECTHELPER_HH
-// This object helper helps with form fields for interactive forms.
-// Please see comments in QPDFAcroFormDocumentHelper.hh for additional
-// details.
+// This object helper helps with form fields for interactive forms. Please see comments in
+// QPDFAcroFormDocumentHelper.hh for additional details.
#include <qpdf/QPDFObjectHelper.hh>
@@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
QPDF_DLL
bool isNull();
- // Return the field's parent. A form field object helper whose
- // underlying object is null is returned if there is no parent.
- // This condition may be tested by calling isNull().
+ // Return the field's parent. A form field object helper whose underlying object is null is
+ // returned if there is no parent. This condition may be tested by calling isNull().
QPDF_DLL
QPDFFormFieldObjectHelper getParent();
- // Return the top-level field for this field. Typically this will
- // be the field itself or its parent. If is_different is provided,
- // it is set to true if the top-level field is different from the
- // field itself; otherwise it is set to false.
+ // Return the top-level field for this field. Typically this will be the field itself or its
+ // parent. If is_different is provided, it is set to true if the top-level field is different
+ // from the field itself; otherwise it is set to false.
QPDF_DLL
QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr);
- // Get a field value, possibly inheriting the value from an
- // ancestor node.
+ // Get a field value, possibly inheriting the value from an ancestor node.
QPDF_DLL
QPDFObjectHandle getInheritableFieldValue(std::string const& name);
- // Get an inherited field value as a string. If it is not a
- // string, silently return the empty string.
+ // Get an inherited field value as a string. If it is not a string, silently return the empty
+ // string.
QPDF_DLL
std::string getInheritableFieldValueAsString(std::string const& name);
- // Get an inherited field value of type name as a string
- // representing the name. If it is not a name, silently return
- // the empty string.
+ // Get an inherited field value of type name as a string representing the name. If it is not a
+ // name, silently return the empty string.
QPDF_DLL
std::string getInheritableFieldValueAsName(std::string const& name);
- // Returns the value of /FT if present, otherwise returns the
- // empty string.
+ // Returns the value of /FT if present, otherwise returns the empty string.
QPDF_DLL
std::string getFieldType();
@@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
QPDF_DLL
std::string getPartialName();
- // Return the alternative field name (/TU), which is the field
- // name intended to be presented to users. If not present, fall
- // back to the fully qualified name.
+ // Return the alternative field name (/TU), which is the field name intended to be presented to
+ // users. If not present, fall back to the fully qualified name.
QPDF_DLL
std::string getAlternativeName();
- // Return the mapping field name (/TM). If not present, fall back
- // to the alternative name, then to the partial name.
+ // Return the mapping field name (/TM). If not present, fall back to the alternative name, then
+ // to the partial name.
QPDF_DLL
std::string getMappingName();
QPDF_DLL
QPDFObjectHandle getValue();
- // Return the field's value as a string. If this is called with a
- // field whose value is not a string, the empty string will be
- // silently returned.
+ // Return the field's value as a string. If this is called with a field whose value is not a
+ // string, the empty string will be silently returned.
QPDF_DLL
std::string getValueAsString();
QPDF_DLL
QPDFObjectHandle getDefaultValue();
- // Return the field's default value as a string. If this is called
- // with a field whose value is not a string, the empty string will
- // be silently returned.
+ // Return the field's default value as a string. If this is called with a field whose value is
+ // not a string, the empty string will be silently returned.
QPDF_DLL
std::string getDefaultValueAsString();
- // Return the default appearance string, taking inheritance from
- // the field tree into account. Returns the empty string if the
- // default appearance string is not available (because it's
- // erroneously absent or because this is not a variable text
- // field). If not found in the field hierarchy, look in /AcroForm.
+ // Return the default appearance string, taking inheritance from the field tree into account.
+ // Returns the empty string if the default appearance string is not available (because it's
+ // erroneously absent or because this is not a variable text field). If not found in the field
+ // hierarchy, look in /AcroForm.
QPDF_DLL
std::string getDefaultAppearance();
- // Return the default resource dictionary for the field. This
- // comes not from the field but from the document-level /AcroForm
- // dictionary. While several PDF generates put a /DR key in the
- // form field's dictionary, experimentation suggests that many
- // popular readers, including Adobe Acrobat and Acrobat Reader,
- // ignore any /DR item on the field.
+ // Return the default resource dictionary for the field. This comes not from the field but from
+ // the document-level /AcroForm dictionary. While several PDF generates put a /DR key in the
+ // form field's dictionary, experimentation suggests that many popular readers, including Adobe
+ // Acrobat and Acrobat Reader, ignore any /DR item on the field.
QPDF_DLL
QPDFObjectHandle getDefaultResources();
- // Return the quadding value, taking inheritance from the field
- // tree into account. Returns 0 if quadding is not specified. Look
- // in /AcroForm if not found in the field hierarchy.
+ // Return the quadding value, taking inheritance from the field tree into account. Returns 0 if
+ // quadding is not specified. Look in /AcroForm if not found in the field hierarchy.
QPDF_DLL
int getQuadding();
- // Return field flags from /Ff. The value is a logical or of
- // pdf_form_field_flag_e as defined in qpdf/Constants.h
+ // Return field flags from /Ff. The value is a logical or of pdf_form_field_flag_e as defined in
+ // qpdf/Constants.h
QPDF_DLL
int getFlags();
@@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
// Returns true if field is of type /Tx
QPDF_DLL
bool isText();
- // Returns true if field is of type /Btn and flags do not indicate
- // some other type of button.
+ // Returns true if field is of type /Btn and flags do not indicate some other type of button.
QPDF_DLL
bool isCheckbox();
// Returns true if field is a checkbox and is checked.
QPDF_DLL
bool isChecked();
- // Returns true if field is of type /Btn and flags indicate that
- // it is a radio button
+ // Returns true if field is of type /Btn and flags indicate that it is a radio button
QPDF_DLL
bool isRadioButton();
- // Returns true if field is of type /Btn and flags indicate that
- // it is a pushbutton
+ // Returns true if field is of type /Btn and flags indicate that it is a pushbutton
QPDF_DLL
bool isPushbutton();
// Returns true if fields if of type /Ch
@@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
QPDF_DLL
std::vector<std::string> getChoices();
- // Set an attribute to the given value. If you have a
- // QPDFAcroFormDocumentHelper and you want to set the name of a
- // field, use QPDFAcroFormDocumentHelper::setFormFieldName
- // instead.
+ // Set an attribute to the given value. If you have a QPDFAcroFormDocumentHelper and you want to
+ // set the name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead.
QPDF_DLL
void setFieldAttribute(std::string const& key, QPDFObjectHandle value);
- // Set an attribute to the given value as a Unicode string (UTF-16
- // BE encoded). The input string should be UTF-8 encoded. If you
- // have a QPDFAcroFormDocumentHelper and you want to set the name
- // of a field, use QPDFAcroFormDocumentHelper::setFormFieldName
- // instead.
+ // Set an attribute to the given value as a Unicode string (UTF-16 BE encoded). The input string
+ // should be UTF-8 encoded. If you have a QPDFAcroFormDocumentHelper and you want to set the
+ // name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead.
QPDF_DLL
void setFieldAttribute(std::string const& key, std::string const& utf8_value);
- // Set /V (field value) to the given value. If need_appearances is
- // true and the field type is either /Tx (text) or /Ch (choice),
- // set /NeedAppearances to true. You can explicitly tell this
- // method not to set /NeedAppearances if you are going to generate
- // an appearance stream yourself. Starting with qpdf 8.3.0, this
- // method handles fields of type /Btn (checkboxes, radio buttons,
- // pushbuttons) specially.
+ // Set /V (field value) to the given value. If need_appearances is true and the field type is
+ // either /Tx (text) or /Ch (choice), set /NeedAppearances to true. You can explicitly tell this
+ // method not to set /NeedAppearances if you are going to generate an appearance stream
+ // yourself. Starting with qpdf 8.3.0, this method handles fields of type /Btn (checkboxes,
+ // radio buttons, pushbuttons) specially.
QPDF_DLL
void setV(QPDFObjectHandle value, bool need_appearances = true);
- // Set /V (field value) to the given string value encoded as a
- // Unicode string. The input value should be UTF-8 encoded. See
- // comments above about /NeedAppearances.
+ // Set /V (field value) to the given string value encoded as a Unicode string. The input value
+ // should be UTF-8 encoded. See comments above about /NeedAppearances.
QPDF_DLL
void setV(std::string const& utf8_value, bool need_appearances = true);
- // Update the appearance stream for this field. Note that qpdf's
- // ability to generate appearance streams is limited. We only
- // generate appearance streams for streams of type text or choice.
- // The appearance uses the default parameters provided in the
- // file, and it only supports ASCII characters. Quadding is
- // currently ignored. While this functionality is limited, it
- // should do a decent job on properly constructed PDF files when
- // field values are restricted to ASCII characters.
+ // Update the appearance stream for this field. Note that qpdf's ability to generate appearance
+ // streams is limited. We only generate appearance streams for streams of type text or choice.
+ // The appearance uses the default parameters provided in the file, and it only supports ASCII
+ // characters. Quadding is currently ignored. While this functionality is limited, it should do
+ // a decent job on properly constructed PDF files when field values are restricted to ASCII
+ // characters.
QPDF_DLL
void generateAppearance(QPDFAnnotationObjectHelper&);
diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh
index 4f90990f..37067108 100644
--- a/include/qpdf/QPDFJob.hh
+++ b/include/qpdf/QPDFJob.hh
@@ -2,22 +2,19 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDFJOB_HH
#define QPDFJOB_HH
@@ -55,99 +52,80 @@ class QPDFJob
static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted;
static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password;
- // QPDFUsage is thrown if there are any usage-like errors when
- // calling Config methods.
+ // QPDFUsage is thrown if there are any usage-like errors when calling Config methods.
QPDF_DLL
QPDFJob();
// SETUP FUNCTIONS
- // Initialize a QPDFJob object from argv, which must be a
- // null-terminated array of null-terminated UTF-8-encoded C
- // strings. The progname_env argument is the name of an
- // environment variable which, if set, overrides the name of the
- // executable for purposes of generating the --completion options.
- // See QPDFArgParser for details. If a null pointer is passed in,
- // the default value of "QPDF_EXECUTABLE" is used. This is used by
- // the QPDF cli, which just initializes a QPDFJob from argv, calls
- // run(), and handles errors and exit status issues. You can
- // perform much of the cli functionality programmatically in this
- // way rather than using the regular API. This is exposed in the C
- // API, which makes it easier to get certain high-level qpdf
- // functionality from other languages. If there are any
- // command-line errors, this method will throw QPDFUsage which is
- // derived from std::runtime_error. Other exceptions may be thrown
- // in some cases. Note that argc, and argv should be UTF-8
- // encoded. If you are calling this from a Windows Unicode-aware
- // main (wmain), see QUtil::call_main_from_wmain for information
- // about converting arguments to UTF-8. This method will mutate
- // arguments that are passed to it.
+ // Initialize a QPDFJob object from argv, which must be a null-terminated array of
+ // null-terminated UTF-8-encoded C strings. The progname_env argument is the name of an
+ // environment variable which, if set, overrides the name of the executable for purposes of
+ // generating the --completion options. See QPDFArgParser for details. If a null pointer is
+ // passed in, the default value of "QPDF_EXECUTABLE" is used. This is used by the QPDF cli,
+ // which just initializes a QPDFJob from argv, calls run(), and handles errors and exit status
+ // issues. You can perform much of the cli functionality programmatically in this way rather
+ // than using the regular API. This is exposed in the C API, which makes it easier to get
+ // certain high-level qpdf functionality from other languages. If there are any command-line
+ // errors, this method will throw QPDFUsage which is derived from std::runtime_error. Other
+ // exceptions may be thrown in some cases. Note that argc, and argv should be UTF-8 encoded. If
+ // you are calling this from a Windows Unicode-aware main (wmain), see
+ // QUtil::call_main_from_wmain for information about converting arguments to UTF-8. This method
+ // will mutate arguments that are passed to it.
QPDF_DLL
void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr);
- // Initialize a QPDFJob from json. Passing partial = true prevents
- // this method from doing the final checks (calling
- // checkConfiguration) after processing the json file. This makes
- // it possible to initialize QPDFJob in stages using multiple json
- // files or to have a json file that can be processed from the CLI
- // with --job-json-file and be combined with other arguments. For
- // example, you might include only encryption parameters, leaving
- // it up to the rest of the command-line arguments to provide
- // input and output files. initializeFromJson is called with
- // partial = true when invoked from the command line. To make sure
- // that the json file is fully valid on its own, just don't
- // specify any other command-line flags. If there are any
- // configuration errors, QPDFUsage is thrown. Some error messages
- // may be CLI-centric. If an an exception tells you to use the
- // "--some-option" option, set the "someOption" key in the JSON
+ // Initialize a QPDFJob from json. Passing partial = true prevents this method from doing the
+ // final checks (calling checkConfiguration) after processing the json file. This makes it
+ // possible to initialize QPDFJob in stages using multiple json files or to have a json file
+ // that can be processed from the CLI with --job-json-file and be combined with other arguments.
+ // For example, you might include only encryption parameters, leaving it up to the rest of the
+ // command-line arguments to provide input and output files. initializeFromJson is called with
+ // partial = true when invoked from the command line. To make sure that the json file is fully
+ // valid on its own, just don't specify any other command-line flags. If there are any
+ // configuration errors, QPDFUsage is thrown. Some error messages may be CLI-centric. If an
+ // exception tells you to use the "--some-option" option, set the "someOption" key in the JSON
// object instead.
QPDF_DLL
void initializeFromJson(std::string const& json, bool partial = false);
- // Set name that is used to prefix verbose messages, progress
- // messages, and other things that the library writes to output
- // and error streams on the caller's behalf. Defaults to "qpdf".
+ // Set name that is used to prefix verbose messages, progress messages, and other things that
+ // the library writes to output and error streams on the caller's behalf. Defaults to "qpdf".
QPDF_DLL
void setMessagePrefix(std::string const&);
QPDF_DLL
std::string getMessagePrefix() const;
- // To capture or redirect output, configure the logger returned by
- // getLogger(). By default, all QPDF and QPDFJob objects share the
- // global logger. If you need a private logger for some reason,
- // pass a new one to setLogger(). See comments in QPDFLogger.hh
- // for details on configuring the logger.
+ // To capture or redirect output, configure the logger returned by getLogger(). By default, all
+ // QPDF and QPDFJob objects share the global logger. If you need a private logger for some
+ // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on
+ // configuring the logger.
//
- // If you set a custom logger here, the logger will be passed to
- // all subsequent QPDF objects created by this QPDFJob object.
+ // If you set a custom logger here, the logger will be passed to all subsequent QPDF objects
+ // created by this QPDFJob object.
QPDF_DLL
std::shared_ptr<QPDFLogger> getLogger();
QPDF_DLL
void setLogger(std::shared_ptr<QPDFLogger>);
- // This deprecated method is the old way to capture output, but it
- // didn't capture all output. See comments above for getLogger and
- // setLogger. This will be removed in QPDF 12. For now, it
- // configures a private logger, separating this object from the
- // default logger, and calls setOutputStreams on that logger. See
- // QPDFLogger.hh for additional details.
+ // This deprecated method is the old way to capture output, but it didn't capture all output.
+ // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it
+ // configures a private logger, separating this object from the default logger, and calls
+ // setOutputStreams on that logger. See QPDFLogger.hh for additional details.
[[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void
setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
- // You can register a custom progress reporter to be called by
- // QPDFWriter (see QPDFWriter::registerProgressReporter). This is
- // only called if you also request progress reporting through
- // normal configuration methods (e.g., pass --progress, call
+ // You can register a custom progress reporter to be called by QPDFWriter (see
+ // QPDFWriter::registerProgressReporter). This is only called if you also request progress
+ // reporting through normal configuration methods (e.g., pass --progress, call
// config()->progress, etc.)
QPDF_DLL
void registerProgressReporter(std::function<void(int)>);
- // Check to make sure no contradictory options have been
- // specified. This is called automatically after initializing from
- // argv or json and is also called by run, but you can call it
- // manually as well. It throws a QPDFUsage exception if there are
- // any errors. This Config object (see CONFIGURATION) also has a
- // checkConfiguration method which calls this one.
+ // Check to make sure no contradictory options have been specified. This is called automatically
+ // after initializing from argv or json and is also called by run, but you can call it manually
+ // as well. It throws a QPDFUsage exception if there are any errors. This Config object (see
+ // CONFIGURATION) also has a checkConfiguration method which calls this one.
QPDF_DLL
void checkConfiguration();
@@ -157,8 +135,7 @@ class QPDFJob
// SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES
private:
- // These structures are private but we need to define them before
- // the public Config classes.
+ // These structures are private but we need to define them before the public Config classes.
struct CopyAttachmentFrom
{
std::string path;
@@ -197,33 +174,27 @@ class QPDFJob
// Configuration classes are implemented in QPDFJob_config.cc.
- // The config() method returns a shared pointer to a Config
- // object. The Config object contains methods that correspond with
- // qpdf command-line arguments. You can use a fluent interface to
- // configure a QPDFJob object that would do exactly the same thing
- // as a specific qpdf command. The example qpdf-job.cc contains an
- // example of this usage. You can also use initializeFromJson or
- // initializeFromArgv to initialize a QPDFJob object.
+ // The config() method returns a shared pointer to a Config object. The Config object contains
+ // methods that correspond with qpdf command-line arguments. You can use a fluent interface to
+ // configure a QPDFJob object that would do exactly the same thing as a specific qpdf command.
+ // The example qpdf-job.cc contains an example of this usage. You can also use
+ // initializeFromJson or initializeFromArgv to initialize a QPDFJob object.
// Notes about the Config methods:
//
- // * Most of the method declarations are automatically generated
- // in header files that are included within the class
- // definitions. They correspond in predictable ways to the
- // command-line arguments and are generated from the same code
- // that generates the command-line argument parsing code.
+ // * Most of the method declarations are automatically generated in header files that are
+ // included within the class definitions. They correspond in predictable ways to the
+ // command-line arguments and are generated from the same code that generates the command-line
+ // argument parsing code.
//
- // * Methods return pointers, rather than references, to
- // configuration objects. References might feel more familiar to
- // users of fluent interfaces, so why do we use pointers? The
- // main methods that create them return smart pointers so that
- // users can initialize them when needed, which you can't do
- // with references. Returning pointers instead of references
- // makes for a more uniform interface.
-
- // Maintainer documentation: see the section in README-maintainer
- // called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains
- // references to additional places in the documentation.
+ // * Methods return pointers, rather than references, to configuration objects. References
+ // might feel more familiar to users of fluent interfaces, so why do we use pointers? The
+ // main methods that create them return smart pointers so that users can initialize them when
+ // needed, which you can't do with references. Returning pointers instead of references makes
+ // for a more uniform interface.
+
+ // Maintainer documentation: see the section in README-maintainer called "HOW TO ADD A
+ // COMMAND-LINE ARGUMENT", which contains references to additional places in the documentation.
class Config;
@@ -374,13 +345,11 @@ class QPDFJob
QPDFJob& o;
};
- // Return a top-level configuration item. See CONFIGURATION above
- // for details. If an invalid configuration is created (such as
- // supplying contradictory options, omitting an input file, etc.),
- // QPDFUsage is thrown. Note that error messages are CLI-centric,
- // but you can map them into config calls. For example, if an
- // exception tells you to use the --some-option flag, you should
- // call config()->someOption() instead.
+ // Return a top-level configuration item. See CONFIGURATION above for details. If an invalid
+ // configuration is created (such as supplying contradictory options, omitting an input file,
+ // etc.), QPDFUsage is thrown. Note that error messages are CLI-centric, but you can map them
+ // into config calls. For example, if an exception tells you to use the --some-option flag, you
+ // should call config()->someOption() instead.
QPDF_DLL
std::shared_ptr<Config> config();
@@ -388,33 +357,27 @@ class QPDFJob
QPDF_DLL
void run();
- // The following two methods allow a job to be run in two stages - creation
- // of a QPDF object and writing of the QPDF object. This allows the QPDF
- // object to be modified prior to writing it out. See
- // examples/qpdfjob-remove-annotations for an illustration of its use.
+ // The following two methods allow a job to be run in two stages - creation of a QPDF object and
+ // writing of the QPDF object. This allows the QPDF object to be modified prior to writing it
+ // out. See examples/qpdfjob-remove-annotations for an illustration of its use.
- // Run the first stage of the job. Return a nullptr if the configuration is
- // not valid.
+ // Run the first stage of the job. Return a nullptr if the configuration is not valid.
QPDF_DLL
std::unique_ptr<QPDF> createQPDF();
- // Run the second stage of the job. Do nothing if a nullptr is passed as
- // parameter.
+ // Run the second stage of the job. Do nothing if a nullptr is passed as parameter.
QPDF_DLL
void writeQPDF(QPDF& qpdf);
- // CHECK STATUS -- these methods provide information known after
- // run() is called.
+ // CHECK STATUS -- these methods provide information known after run() is called.
QPDF_DLL
bool hasWarnings() const;
- // Return one of the EXIT_* constants defined at the top of the
- // class declaration. This may be called after run() when run()
- // did not throw an exception. Takes into consideration whether
- // isEncrypted or requiresPassword was called. Note that this
- // function does not know whether run() threw an exception, so
- // code that uses this to determine how to exit should explicitly
+ // Return one of the EXIT_* constants defined at the top of the class declaration. This may be
+ // called after run() when run() did not throw an exception. Takes into consideration whether
+ // isEncrypted or requiresPassword was called. Note that this function does not know whether
+ // run() threw an exception, so code that uses this to determine how to exit should explicitly
// use EXIT_ERROR if run() threw an exception.
QPDF_DLL
int getExitCode() const;
@@ -423,24 +386,22 @@ class QPDFJob
QPDF_DLL
unsigned long getEncryptionStatus();
- // HELPER FUNCTIONS -- methods useful for calling in handlers that
- // interact with QPDFJob during run or initialization.
+ // HELPER FUNCTIONS -- methods useful for calling in handlers that interact with QPDFJob during
+ // run or initialization.
- // If in verbose mode, call the given function, passing in the
- // output stream and message prefix.
+ // If in verbose mode, call the given function, passing in the output stream and message prefix.
QPDF_DLL
void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn);
- // Provide a string that is the help information ("schema" for the
- // qpdf-specific JSON object) for the specified version of JSON
- // output.
+ // Provide a string that is the help information ("schema" for the qpdf-specific JSON object)
+ // for the specified version of JSON output.
QPDF_DLL
static std::string json_out_schema(int version);
[[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1();
- // Provide a string that is the help information for specified
- // version of JSON format for QPDFJob.
+ // Provide a string that is the help information for specified version of JSON format for
+ // QPDFJob.
QPDF_DLL
static std::string job_json_schema(int version);
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 358f0465..45ad1079 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -66,9 +66,8 @@ class QPDFObjectHandle
friend class QPDFParser;
public:
- // This class is used by replaceStreamData. It provides an
- // alternative way of associating stream data with a stream. See
- // comments on replaceStreamData and newStream for additional
+ // This class is used by replaceStreamData. It provides an alternative way of associating
+ // stream data with a stream. See comments on replaceStreamData and newStream for additional
// details.
class QPDF_DLL_CLASS StreamDataProvider
{
@@ -78,55 +77,40 @@ class QPDFObjectHandle
QPDF_DLL
virtual ~StreamDataProvider();
- // The implementation of this function must write stream data
- // to the given pipeline. The stream data must conform to
- // whatever filters are explicitly associated with the stream.
- // QPDFWriter may, in some cases, add compression, but if it
- // does, it will update the filters as needed. Every call to
- // provideStreamData for a given stream must write the same
- // data. Note that, when writing linearized files, qpdf will
- // call your provideStreamData twice, and if it generates
- // different output, you risk generating invalid output or
- // having qpdf throw an exception. The object ID and
- // generation passed to this method are those that belong to
- // the stream on behalf of which the provider is called. They
- // may be ignored or used by the implementation for indexing
- // or other purposes. This information is made available just
- // to make it more convenient to use a single
- // StreamDataProvider object to provide data for multiple
- // streams.
+ // The implementation of this function must write stream data to the given pipeline. The
+ // stream data must conform to whatever filters are explicitly associated with the stream.
+ // QPDFWriter may, in some cases, add compression, but if it does, it will update the
+ // filters as needed. Every call to provideStreamData for a given stream must write the same
+ // data. Note that, when writing linearized files, qpdf will call your provideStreamData
+ // twice, and if it generates different output, you risk generating invalid output or having
+ // qpdf throw an exception. The object ID and generation passed to this method are those
+ // that belong to the stream on behalf of which the provider is called. They may be ignored
+ // or used by the implementation for indexing or other purposes. This information is made
+ // available just to make it more convenient to use a single StreamDataProvider object to
+ // provide data for multiple streams.
// A few things to keep in mind:
//
- // * Stream data providers must not modify any objects since
- // they may be called after some parts of the file have
- // already been written.
+ // * Stream data providers must not modify any objects since they may be called after some
+ // parts of the file have already been written.
//
- // * Since qpdf may call provideStreamData multiple times when
- // writing linearized files, if the work done by your stream
- // data provider is slow or computationally intensive, you
+ // * Since qpdf may call provideStreamData multiple times when writing linearized files, if
+ // the work done by your stream data provider is slow or computationally intensive, you
// might want to implement your own cache.
//
- // * Once you have called replaceStreamData, the original
- // stream data is no longer directly accessible from the
- // stream, but this is easy to work around by copying the
- // stream to a separate QPDF object. The qpdf library
- // implements this very efficiently without actually making
- // a copy of the stream data. You can find examples of this
- // pattern in some of the examples, including
- // pdf-custom-filter.cc and pdf-invert-images.cc.
-
- // Prior to qpdf 10.0.0, it was not possible to handle errors
- // the way pipeStreamData does or to pass back success.
- // Starting in qpdf 10.0.0, those capabilities have been added
- // by allowing an alternative provideStreamData to be
- // implemented. You must implement at least one of the
- // versions of provideStreamData below. If you implement the
- // version that supports retry and returns a value, you should
- // pass true as the value of supports_retry in the base class
- // constructor. This will cause the library to call that
- // version of the method, which should also return a boolean
- // indicating whether it ran without errors.
+ // * Once you have called replaceStreamData, the original stream data is no longer directly
+ // accessible from the stream, but this is easy to work around by copying the stream to
+ // a separate QPDF object. The qpdf library implements this very efficiently without
+ // actually making a copy of the stream data. You can find examples of this pattern in
+ // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc.
+
+ // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or
+ // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by
+ // allowing an alternative provideStreamData to be implemented. You must implement at least
+ // one of the versions of provideStreamData below. If you implement the version that
+ // supports retry and returns a value, you should pass true as the value of supports_retry
+ // in the base class constructor. This will cause the library to call that version of the
+ // method, which should also return a boolean indicating whether it ran without errors.
QPDF_DLL
virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline);
QPDF_DLL
@@ -142,41 +126,31 @@ class QPDFObjectHandle
bool supports_retry;
};
- // The TokenFilter class provides a way to filter content streams
- // in a lexically aware fashion. TokenFilters can be attached to
- // streams using the addTokenFilter or addContentTokenFilter
- // methods or can be applied on the spot by filterPageContents.
- // You may also use Pl_QPDFTokenizer directly if you need full
- // control.
+ // The TokenFilter class provides a way to filter content streams in a lexically aware fashion.
+ // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter
+ // methods or can be applied on the spot by filterPageContents. You may also use
+ // Pl_QPDFTokenizer directly if you need full control.
//
- // The handleToken method is called for each token, including the
- // eof token, and then handleEOF is called at the very end.
- // Handlers may call write (or writeToken) to pass data
- // downstream. Please see examples/pdf-filter-tokens.cc and
- // examples/pdf-count-strings.cc for examples of using
- // TokenFilters.
+ // The handleToken method is called for each token, including the eof token, and then handleEOF
+ // is called at the very end. Handlers may call write (or writeToken) to pass data downstream.
+ // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of
+ // using TokenFilters.
//
- // Please note that when you call token.getValue() on a token of
- // type tt_string or tt_name, you get the canonical, "parsed"
- // representation of the token. For a string, this means that
- // there are no delimiters, and for a name, it means that all
- // escaping (# followed by two hex digits) has been resolved.
- // qpdf's internal representation of a name includes the leading
- // slash. As such, you can't write the value of token.getValue()
- // directly to output that is supposed to be valid PDF syntax. If
- // you want to do that, you need to call writeToken() instead, or
- // you can retrieve the token as it appeared in the input with
- // token.getRawValue(). To construct a new string or name token
- // from a canonical representation, use
+ // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you
+ // get the canonical, "parsed" representation of the token. For a string, this means that there
+ // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits)
+ // has been resolved. qpdf's internal representation of a name includes the leading slash. As
+ // such, you can't write the value of token.getValue() directly to output that is supposed to be
+ // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can
+ // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new
+ // string or name token from a canonical representation, use
// QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or
// QPDFTokenizer::Token(QPDFTokenizer::tt_name,
- // "/Canonical-Name"). Tokens created this way won't have a
- // PDF-syntax raw value, but you can still write them with
- // writeToken(). Example:
+ // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can
+ // still write them with writeToken(). Example:
// writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain"))
// would write `/text#2fplain`, and
- // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b"))
- // would write `(a\(b)`.
+ // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`.
class QPDF_DLL_CLASS TokenFilter
{
public:
@@ -215,8 +189,8 @@ class QPDFObjectHandle
Pipeline* pipeline;
};
- // This class is used by parse to decrypt strings when reading an
- // object that contains encrypted strings.
+ // This class is used by parse to decrypt strings when reading an object that contains encrypted
+ // strings.
class StringDecrypter
{
public:
@@ -225,9 +199,8 @@ class QPDFObjectHandle
virtual void decryptString(std::string& val) = 0;
};
- // This class is used by parsePageContents. Callers must
- // instantiate a subclass of this with handlers defined to accept
- // QPDFObjectHandles that are parsed from the stream.
+ // This class is used by parsePageContents. Callers must instantiate a subclass of this with
+ // handlers defined to accept QPDFObjectHandles that are parsed from the stream.
class QPDF_DLL_CLASS ParserCallbacks
{
public:
@@ -241,17 +214,14 @@ class QPDFObjectHandle
virtual void handleEOF() = 0;
- // Override this if you want to know the full size of the
- // contents, possibly after concatenation of multiple streams.
- // This is called before the first call to handleObject.
+ // Override this if you want to know the full size of the contents, possibly after
+ // concatenation of multiple streams. This is called before the first call to handleObject.
QPDF_DLL
virtual void contentSize(size_t);
protected:
- // Implementors may call this method during parsing to
- // terminate parsing early. This method throws an exception
- // that is caught by parsePageContents, so its effect is
- // immediate.
+ // Implementors may call this method during parsing to terminate parsing early. This method
+ // throws an exception that is caught by parsePageContents, so its effect is immediate.
QPDF_DLL
void terminateParsing();
};
@@ -281,9 +251,8 @@ class QPDFObjectHandle
double ury;
};
- // Convenience object for transformation matrices. See also
- // QPDFMatrix. Unfortunately we can't replace this with QPDFMatrix
- // because QPDFMatrix's default constructor creates the identity
+ // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't
+ // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity
// transform matrix and this one is all zeroes.
class Matrix
{
@@ -324,25 +293,22 @@ class QPDFObjectHandle
QPDF_DLL
inline bool isInitialized() const;
- // This method returns true if the QPDFObjectHandle objects point
- // to exactly the same underlying object, meaning that changes to
- // one are reflected in the other, or "if you paint one, the other
- // one changes color." This does not perform a structural
- // comparison of the contents of the objects.
+ // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying
+ // object, meaning that changes to one are reflected in the other, or "if you paint one, the
+ // other one changes color." This does not perform a structural comparison of the contents of
+ // the objects.
QPDF_DLL
bool isSameObjectAs(QPDFObjectHandle const&) const;
- // Return type code and type name of underlying object. These are
- // useful for doing rapid type tests (like switch statements) or
- // for testing and debugging.
+ // Return type code and type name of underlying object. These are useful for doing rapid type
+ // tests (like switch statements) or for testing and debugging.
QPDF_DLL
qpdf_object_type_e getTypeCode();
QPDF_DLL
char const* getTypeName();
- // Exactly one of these will return true for any initialized
- // object. Operator and InlineImage are only allowed in content
- // streams.
+ // Exactly one of these will return true for any initialized object. Operator and InlineImage
+ // are only allowed in content streams.
QPDF_DLL
bool isBool();
QPDF_DLL
@@ -368,26 +334,22 @@ class QPDFObjectHandle
QPDF_DLL
bool isReserved();
- // True for objects that are direct nulls. Does not attempt to
- // resolve objects. This is intended for internal use, but it can
- // be used as an efficient way to check for nulls that are not
+ // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended
+ // for internal use, but it can be used as an efficient way to check for nulls that are not
// indirect objects.
QPDF_DLL
bool isDirectNull() const;
- // This returns true in addition to the query for the specific
- // type for indirect objects.
+ // This returns true in addition to the query for the specific type for indirect objects.
QPDF_DLL
inline bool isIndirect() const;
- // This returns true for indirect objects from a QPDF that has
- // been destroyed. Trying unparse such an object will throw a
- // logic_error.
+ // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse
+ // such an object will throw a logic_error.
QPDF_DLL
bool isDestroyed();
- // True for everything except array, dictionary, stream, word, and
- // inline image.
+ // True for everything except array, dictionary, stream, word, and inline image.
QPDF_DLL
bool isScalar();
@@ -395,53 +357,44 @@ class QPDFObjectHandle
QPDF_DLL
bool isNameAndEquals(std::string const& name);
- // True if the object is a dictionary of the specified type and
- // subtype, if any.
+ // True if the object is a dictionary of the specified type and subtype, if any.
QPDF_DLL
bool isDictionaryOfType(std::string const& type, std::string const& subtype = "");
- // True if the object is a stream of the specified type and
- // subtype, if any.
+ // True if the object is a stream of the specified type and subtype, if any.
QPDF_DLL
bool isStreamOfType(std::string const& type, std::string const& subtype = "");
// Public factory methods
- // Wrap an object in an array if it is not already an array. This
- // is a helper for cases in which something in a PDF may either be
- // a single item or an array of items, which is a common idiom.
+ // Wrap an object in an array if it is not already an array. This is a helper for cases in which
+ // something in a PDF may either be a single item or an array of items, which is a common idiom.
QPDF_DLL
QPDFObjectHandle wrapInArray();
- // Construct an object of any type from a string representation of
- // the object. Throws QPDFExc with an empty filename and an
- // offset into the string if there is an error. Any indirect
- // object syntax (obj gen R) will cause a logic_error exception to
- // be thrown. If object_description is provided, it will appear
- // in the message of any QPDFExc exception thrown for invalid
- // syntax. See also the global `operator ""_qpdf` defined below.
+ // Construct an object of any type from a string representation of the object. Throws QPDFExc
+ // with an empty filename and an offset into the string if there is an error. Any indirect
+ // object syntax (obj gen R) will cause a logic_error exception to be thrown. If
+ // object_description is provided, it will appear in the message of any QPDFExc exception thrown
+ // for invalid syntax. See also the global `operator ""_qpdf` defined below.
QPDF_DLL
static QPDFObjectHandle
parse(std::string const& object_str, std::string const& object_description = "");
- // Construct an object of any type from a string representation of
- // the object. Indirect object syntax (obj gen R) is allowed and
- // will create indirect references within the passed-in context.
- // If object_description is provided, it will appear in the
- // message of any QPDFExc exception thrown for invalid syntax.
- // Note that you can't parse an indirect object reference all by
- // itself as parse will stop at the end of the first complete
- // object, which will just be the first number and will report
- // that there is trailing data at the end of the string.
+ // Construct an object of any type from a string representation of the object. Indirect object
+ // syntax (obj gen R) is allowed and will create indirect references within the passed-in
+ // context. If object_description is provided, it will appear in the message of any QPDFExc
+ // exception thrown for invalid syntax. Note that you can't parse an indirect object reference
+ // all by itself as parse will stop at the end of the first complete object, which will just be
+ // the first number and will report that there is trailing data at the end of the string.
QPDF_DLL
static QPDFObjectHandle
parse(QPDF* context, std::string const& object_str, std::string const& object_description = "");
- // Construct an object as above by reading from the given
- // InputSource at its current position and using the tokenizer you
- // supply. Indirect objects and encrypted strings are permitted.
- // This method was intended to be called by QPDF for parsing
- // objects that are ready from the object's input stream.
+ // Construct an object as above by reading from the given InputSource at its current position
+ // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted.
+ // This method was intended to be called by QPDF for parsing objects that are ready from the
+ // object's input stream.
QPDF_DLL
static QPDFObjectHandle parse(
std::shared_ptr<InputSource> input,
@@ -451,60 +404,46 @@ class QPDFObjectHandle
StringDecrypter* decrypter,
QPDF* context);
- // Return the offset where the object was found when parsed. A
- // negative value means that the object was created without
- // parsing. If the object is in a stream, the offset is from the
- // beginning of the stream. Otherwise, the offset is from the
- // beginning of the file.
+ // Return the offset where the object was found when parsed. A negative value means that the
+ // object was created without parsing. If the object is in a stream, the offset is from the
+ // beginning of the stream. Otherwise, the offset is from the beginning of the file.
QPDF_DLL
qpdf_offset_t getParsedOffset();
- // Older method: stream_or_array should be the value of /Contents
- // from a page object. It's more convenient to just call
- // QPDFPageObjectHelper::parsePageContents on the page object, and
- // error messages will also be more useful because the page object
- // information will be known.
+ // Older method: stream_or_array should be the value of /Contents from a page object. It's more
+ // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error
+ // messages will also be more useful because the page object information will be known.
QPDF_DLL
static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks);
- // When called on a stream or stream array that is some page's
- // content streams, do the same as pipePageContents. This method
- // is a lower level way to do what
- // QPDFPageObjectHelper::pipePageContents does, but it allows you
- // to perform this operation on a contents object that is
- // disconnected from a page object. The description argument
- // should describe the containing page and is used in error
- // messages. The all_description argument is initialized to
- // something that could be used to describe the result of the
- // pipeline. It is the description amended with the identifiers of
- // the underlying objects. Please note that if there is an array
- // of content streams, p->finish() is called after each stream. If
- // you pass a pipeline that doesn't allow write() to be called
- // after finish(), you can wrap it in an instance of
- // Pl_Concatenate and then call manualFinish() on the
- // Pl_Concatenate pipeline at the end.
+ // When called on a stream or stream array that is some page's content streams, do the same as
+ // pipePageContents. This method is a lower level way to do what
+ // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a
+ // contents object that is disconnected from a page object. The description argument should
+ // describe the containing page and is used in error messages. The all_description argument is
+ // initialized to something that could be used to describe the result of the pipeline. It is the
+ // description amended with the identifiers of the underlying objects. Please note that if there
+ // is an array of content streams, p->finish() is called after each stream. If you pass a
+ // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an
+ // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the
+ // end.
QPDF_DLL
void
pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description);
- // As of qpdf 8, it is possible to add custom token filters to a
- // stream. The tokenized stream data is passed through the token
- // filter after all original filters but before content stream
- // normalization if requested. This is a low-level interface to
- // add it to a stream. You will usually want to call
- // QPDFPageObjectHelper::addContentTokenFilter instead, which can
- // be applied to a page object, and which will automatically
- // handle the case of pages whose contents are split across
- // multiple streams.
+ // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream
+ // data is passed through the token filter after all original filters but before content stream
+ // normalization if requested. This is a low-level interface to add it to a stream. You will
+ // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be
+ // applied to a page object, and which will automatically handle the case of pages whose
+ // contents are split across multiple streams.
QPDF_DLL
void addTokenFilter(std::shared_ptr<TokenFilter> token_filter);
- // Legacy helpers for parsing content streams. These methods are
- // not going away, but newer code should call the correspond
- // methods in QPDFPageObjectHelper instead. The specification and
- // behavior of these methods are the same as the identically named
- // methods in that class, but newer functionality will be added
- // there.
+ // Legacy helpers for parsing content streams. These methods are not going away, but newer code
+ // should call the correspond methods in QPDFPageObjectHelper instead. The specification and
+ // behavior of these methods are the same as the identically named methods in that class, but
+ // newer functionality will be added there.
QPDF_DLL
void parsePageContents(ParserCallbacks* callbacks);
QPDF_DLL
@@ -516,13 +455,12 @@ class QPDFObjectHandle
void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter);
// End legacy content stream helpers
- // Called on a stream to filter the stream as if it were page
- // contents. This can be used to apply a TokenFilter to a form
- // XObject, whose data is in the same format as a content stream.
+ // Called on a stream to filter the stream as if it were page contents. This can be used to
+ // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream.
QPDF_DLL
void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr);
- // Called on a stream to parse the stream as page contents. This
- // can be used to parse a form XObject.
+ // Called on a stream to parse the stream as page contents. This can be used to parse a form
+ // XObject.
QPDF_DLL
void parseAsContents(ParserCallbacks* callbacks);
@@ -538,32 +476,25 @@ class QPDFObjectHandle
QPDF_DLL
static QPDFObjectHandle
newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true);
- // Note about name objects: qpdf's internal representation of a
- // PDF name is a sequence of bytes, excluding the NUL character,
- // and starting with a slash. Name objects as represented in the
- // PDF specification can contain characters escaped with #, but
- // such escaping is not of concern when calling QPDFObjectHandle
- // methods not directly relating to parsing. For example,
- // newName("/text/plain").getName() and
- // parse("/text#2fplain").getName() both return "/text/plain",
- // while newName("/text/plain").unparse() and
- // parse("/text#2fplain").unparse() both return "/text#2fplain".
- // When working with the qpdf API for creating, retrieving, and
- // modifying objects, you want to work with the internal,
- // canonical representation. For names containing alphanumeric
- // characters, dashes, and underscores, there is no difference
- // between the two representations. For a lengthy discussion, see
+ // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes,
+ // excluding the NUL character, and starting with a slash. Name objects as represented in the
+ // PDF specification can contain characters escaped with #, but such escaping is not of concern
+ // when calling QPDFObjectHandle methods not directly relating to parsing. For example,
+ // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return
+ // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse()
+ // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and
+ // modifying objects, you want to work with the internal, canonical representation. For names
+ // containing alphanumeric characters, dashes, and underscores, there is no difference between
+ // the two representations. For a lengthy discussion, see
// https://github.com/qpdf/qpdf/discussions/625.
QPDF_DLL
static QPDFObjectHandle newName(std::string const& name);
QPDF_DLL
static QPDFObjectHandle newString(std::string const& str);
- // Create a string encoded from the given utf8-encoded string
- // appropriately encoded to appear in PDF files outside of content
- // streams, such as in document metadata form field values, page
- // labels, outlines, and similar locations. We try ASCII first,
- // then PDFDocEncoding, then UTF-16 as needed to successfully
- // encode all the characters.
+ // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in
+ // PDF files outside of content streams, such as in document metadata form field values, page
+ // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16
+ // as needed to successfully encode all the characters.
QPDF_DLL
static QPDFObjectHandle newUnicodeString(std::string const& utf8_str);
QPDF_DLL
@@ -585,86 +516,67 @@ class QPDFObjectHandle
QPDF_DLL
static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items);
- // Create an array from a rectangle. Equivalent to the rectangle
- // form of newArray.
+ // Create an array from a rectangle. Equivalent to the rectangle form of newArray.
QPDF_DLL
static QPDFObjectHandle newFromRectangle(Rectangle const&);
- // Create an array from a matrix. Equivalent to the matrix
- // form of newArray.
+ // Create an array from a matrix. Equivalent to the matrix form of newArray.
QPDF_DLL
static QPDFObjectHandle newFromMatrix(Matrix const&);
QPDF_DLL
static QPDFObjectHandle newFromMatrix(QPDFMatrix const&);
- // Note: new stream creation methods have were added to the QPDF
- // class starting with version 11.2.0. The ones in this class are
- // here for backward compatibility.
+ // Note: new stream creation methods have were added to the QPDF class starting with
+ // version 11.2.0. The ones in this class are here for backward compatibility.
- // Create a new stream and associate it with the given qpdf
- // object. A subsequent call must be made to replaceStreamData()
- // to provide data for the stream. The stream's dictionary may be
- // retrieved by calling getDict(), and the resulting dictionary
- // may be modified. Alternatively, you can create a new dictionary
- // and call replaceDict to install it. From QPDF 11.2, you can
+ // Create a new stream and associate it with the given qpdf object. A subsequent call must be
+ // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be
+ // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively,
+ // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can
// call QPDF::newStream() instead.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf);
- // Create a new stream and associate it with the given qpdf
- // object. Use the given buffer as the stream data. The stream
- // dictionary's /Length key will automatically be set to the size
- // of the data buffer. If additional keys are required, the
- // stream's dictionary may be retrieved by calling getDict(), and
- // the resulting dictionary may be modified. This method is just a
- // convenient wrapper around the newStream() and
- // replaceStreamData(). It is a convenience methods for streams
- // that require no parameters beyond the stream length. Note that
- // you don't have to deal with compression yourself if you use
- // QPDFWriter. By default, QPDFWriter will automatically compress
- // uncompressed stream data. Example programs are provided that
+ // Create a new stream and associate it with the given qpdf object. Use the given buffer as the
+ // stream data. The stream dictionary's /Length key will automatically be set to the size of the
+ // data buffer. If additional keys are required, the stream's dictionary may be retrieved by
+ // calling getDict(), and the resulting dictionary may be modified. This method is just a
+ // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience
+ // methods for streams that require no parameters beyond the stream length. Note that you don't
+ // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will
+ // automatically compress uncompressed stream data. Example programs are provided that
// illustrate this. From QPDF 11.2, you can call QPDF::newStream()
// instead.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data);
- // Create new stream with data from string. This method will
- // create a copy of the data rather than using the user-provided
- // buffer as in the std::shared_ptr<Buffer> version of newStream.
+ // Create new stream with data from string. This method will create a copy of the data rather
+ // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
// From QPDF 11.2, you can call QPDF::newStream() instead.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data);
- // A reserved object is a special sentinel used for qpdf to
- // reserve a spot for an object that is going to be added to the
- // QPDF object. Normally you don't have to use this type since
- // you can just call QPDF::makeIndirectObject. However, in some
- // cases, if you have to create objects with circular references,
- // you may need to create a reserved object so that you can have a
- // reference to it and then replace the object later. Reserved
- // objects have the special property that they can't be resolved
- // to direct objects. This makes it possible to replace a
- // reserved object with a new object while preserving existing
- // references to them. When you are ready to replace a reserved
- // object with its replacement, use QPDF::replaceReserved for this
- // purpose rather than the more general QPDF::replaceObject. It
- // is an error to try to write a QPDF with QPDFWriter if it has
- // any reserved objects in it. From QPDF 11.4, you can
- // call QPDF::newReserved() instead.
+ // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
+ // going to be added to the QPDF object. Normally you don't have to use this type since you can
+ // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects
+ // with circular references, you may need to create a reserved object so that you can have a
+ // reference to it and then replace the object later. Reserved objects have the special
+ // property that they can't be resolved to direct objects. This makes it possible to replace a
+ // reserved object with a new object while preserving existing references to them. When you are
+ // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
+ // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a
+ // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call
+ // QPDF::newReserved() instead.
QPDF_DLL
static QPDFObjectHandle newReserved(QPDF* qpdf);
- // Provide an owning qpdf and object description. The library does
- // this automatically with objects that are read from the input
- // PDF and with objects that are created programmatically and
- // inserted into the QPDF as a new indirect object. Most end user
- // code will not need to call this. If an object has an owning
- // qpdf and object description, it enables qpdf to give warnings
- // with proper context in some cases where it would otherwise
- // raise exceptions. It is okay to add objects without an
- // owning_qpdf to objects that have one, but it is an error to
- // have a QPDF contain objects with owning_qpdf set to something
- // else. To add objects from another qpdf, use copyForeignObject
- // instead.
+ // Provide an owning qpdf and object description. The library does this automatically with
+ // objects that are read from the input PDF and with objects that are created programmatically
+ // and inserted into the QPDF as a new indirect object. Most end user code will not need to call
+ // this. If an object has an owning qpdf and object description, it enables qpdf to give
+ // warnings with proper context in some cases where it would otherwise raise exceptions. It is
+ // okay to add objects without an owning_qpdf to objects that have one, but it is an error to
+ // have a QPDF contain objects with owning_qpdf set to something else. To add objects from
+ // another qpdf, use copyForeignObject instead.
QPDF_DLL
void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description);
QPDF_DLL
@@ -674,62 +586,47 @@ class QPDFObjectHandle
//
// (Note: this comment is referenced in qpdf-c.h and the manual.)
//
- // In PDF files, objects have specific types, but there is nothing
- // that prevents PDF files from containing objects of types that
- // aren't expected by the specification.
+ // In PDF files, objects have specific types, but there is nothing that prevents PDF files from
+ // containing objects of types that aren't expected by the specification.
//
// There are two flavors of accessor methods:
//
- // * getSomethingValue() returns the value and issues a type
- // warning if the type is incorrect.
+ // * getSomethingValue() returns the value and issues a type warning if the type is incorrect.
//
- // * getValueAsSomething() returns false if the value is the wrong
- // type. Otherwise, it returns true and initializes a reference
- // of the appropriate type. These methods never issue type
+ // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns
+ // true and initializes a reference of the appropriate type. These methods never issue type
// warnings.
//
- // The getSomethingValue() accessors and some of the other methods
- // expect objects of a particular type. Prior to qpdf 8, calling
- // an accessor on a method of the wrong type, such as trying to
- // get a dictionary key from an array, trying to get the string
- // value of a number, etc., would throw an exception, but since
- // qpdf 8, qpdf issues a warning and recovers using the following
- // behavior:
+ // The getSomethingValue() accessors and some of the other methods expect objects of a
+ // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as
+ // trying to get a dictionary key from an array, trying to get the string value of a number,
+ // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using
+ // the following behavior:
//
- // * Requesting a value of the wrong type (int value from string,
- // array item from a scalar or dictionary, etc.) will return a
- // zero-like value for that type: false for boolean, 0 for
- // number, the empty string for string, or the null object for
- // an object handle.
+ // * Requesting a value of the wrong type (int value from string, array item from a scalar or
+ // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for
+ // number, the empty string for string, or the null object for an object handle.
//
- // * Accessing an array item that is out of bounds will return a
- // null object.
+ // * Accessing an array item that is out of bounds will return a null object.
//
- // * Attempts to mutate an object of the wrong type (e.g.,
- // attempting to add a dictionary key to a scalar or array) will
- // be ignored.
+ // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to
+ // a scalar or array) will be ignored.
//
- // When any of these fallback behaviors are used, qpdf issues a
- // warning. Starting in qpdf 10.5, these warnings have the error
- // code qpdf_e_object. Prior to 10.5, they had the error code
- // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with
- // a QPDF object (as is the case for all objects whose origin was
- // a PDF file), the warning is issued using the normal warning
- // mechanism (as described in QPDF.hh), making it possible to
- // suppress or otherwise detect them. If the QPDFObjectHandle is
- // not associated with a QPDF object (meaning it was created
+ // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5,
+ // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code
+ // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case
+ // for all objects whose origin was a PDF file), the warning is issued using the normal warning
+ // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them.
+ // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created
// programmatically), an exception will be thrown.
//
- // The way to avoid getting any type warnings or exceptions, even
- // when working with malformed PDF files, is to always check the
- // type of a QPDFObjectHandle before accessing it (for example,
- // make sure that isString() returns true before calling
- // getStringValue()) and to always be sure that any array indices
- // are in bounds.
+ // The way to avoid getting any type warnings or exceptions, even when working with malformed
+ // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for
+ // example, make sure that isString() returns true before calling getStringValue()) and to
+ // always be sure that any array indices are in bounds.
//
- // For additional discussion and rationale for this behavior, see
- // the section in the QPDF manual entitled "Object Accessor
- // Methods".
+ // For additional discussion and rationale for this behavior, see the section in the QPDF manual
+ // entitled "Object Accessor Methods".
// Methods for bool objects
QPDF_DLL
@@ -737,12 +634,10 @@ class QPDFObjectHandle
QPDF_DLL
bool getValueAsBool(bool&);
- // Methods for integer objects. Note: if an integer value is too
- // big (too far away from zero in either direction) to fit in the
- // requested return type, the maximum or minimum value for that
- // return type may be returned. For example, on a system with
- // 32-bit int, a numeric object with a value of 2^40 (or anything
- // too big for 32 bits) will be returned as INT_MAX.
+ // Methods for integer objects. Note: if an integer value is too big (too far away from zero in
+ // either direction) to fit in the requested return type, the maximum or minimum value for that
+ // return type may be returned. For example, on a system with 32-bit int, a numeric object with
+ // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX.
QPDF_DLL
long long getIntValue();
QPDF_DLL
@@ -774,9 +669,8 @@ class QPDFObjectHandle
QPDF_DLL
bool getValueAsNumber(double&);
- // Methods for name objects. The returned name value is in qpdf's
- // canonical form with all escaping resolved. See comments for
- // newName() for details.
+ // Methods for name objects. The returned name value is in qpdf's canonical form with all
+ // escaping resolved. See comments for newName() for details.
QPDF_DLL
std::string getName();
QPDF_DLL
@@ -788,12 +682,10 @@ class QPDFObjectHandle
QPDF_DLL
bool getValueAsString(std::string&);
- // If a string starts with the UTF-16 marker, it is converted from
- // UTF-16 to UTF-8. Otherwise, it is treated as a string encoded
- // with PDF Doc Encoding. PDF Doc Encoding is identical to
- // ISO-8859-1 except in the range from 0200 through 0240, where
- // there is a mapping of characters to Unicode. QPDF versions
- // prior to version 8.0.0 erroneously left characters in that range
+ // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise,
+ // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to
+ // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters
+ // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range
// unmapped.
QPDF_DLL
std::string getUTF8Value();
@@ -812,8 +704,7 @@ class QPDFObjectHandle
// Methods for array objects; see also name and array objects.
- // Return an object that enables iteration over members. You can
- // do
+ // Return an object that enables iteration over members. You can do
//
// for (auto iter: obj.aitems())
// {
@@ -827,32 +718,29 @@ class QPDFObjectHandle
int getArrayNItems();
QPDF_DLL
QPDFObjectHandle getArrayItem(int n);
- // Note: QPDF arrays internally optimize memory for arrays
- // containing lots of nulls. Calling getArrayAsVector may cause a
- // lot of memory to be allocated for very large arrays with lots
- // of nulls.
+ // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling
+ // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of
+ // nulls.
QPDF_DLL
std::vector<QPDFObjectHandle> getArrayAsVector();
QPDF_DLL
bool isRectangle();
- // If the array is an array of four numeric values, return as a
- // rectangle. Otherwise, return the rectangle [0, 0, 0, 0]
+ // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the
+ // rectangle [0, 0, 0, 0]
QPDF_DLL
Rectangle getArrayAsRectangle();
QPDF_DLL
bool isMatrix();
- // If the array is an array of six numeric values, return as a
- // matrix. Otherwise, return the matrix [1, 0, 0, 1, 0, 0]
+ // If the array is an array of six numeric values, return as a matrix. Otherwise, return the
+ // matrix [1, 0, 0, 1, 0, 0]
QPDF_DLL
Matrix getArrayAsMatrix();
- // Methods for dictionary objects. In all dictionary methods, keys
- // are specified/represented as canonical name strings starting
- // with a leading slash and not containing any PDF syntax
+ // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as
+ // canonical name strings starting with a leading slash and not containing any PDF syntax
// escaping. See comments for getName() for details.
- // Return an object that enables iteration over members. You can
- // do
+ // Return an object that enables iteration over members. You can do
//
// for (auto iter: obj.ditems())
// {
@@ -863,185 +751,149 @@ class QPDFObjectHandle
QPDF_DLL
QPDFDictItems ditems();
- // Return true if key is present. Keys with null values are treated as if
- // they are not present. This is as per the PDF spec.
+ // Return true if key is present. Keys with null values are treated as if they are not present.
+ // This is as per the PDF spec.
QPDF_DLL
bool hasKey(std::string const&);
- // Return the value for the key. If the key is not present, null is
- // returned.
+ // Return the value for the key. If the key is not present, null is returned.
QPDF_DLL
QPDFObjectHandle getKey(std::string const&);
- // If the object is null, return null. Otherwise, call getKey().
- // This makes it easier to access lower-level dictionaries, as in
+ // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access
+ // lower-level dictionaries, as in
// auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font");
QPDF_DLL
QPDFObjectHandle getKeyIfDict(std::string const&);
- // Return all keys. Keys with null values are treated as if
- // they are not present. This is as per the PDF spec.
+ // Return all keys. Keys with null values are treated as if they are not present. This is as
+ // per the PDF spec.
QPDF_DLL
std::set<std::string> getKeys();
// Return dictionary as a map. Entries with null values are included.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getDictAsMap();
- // Methods for name and array objects. The name value is in qpdf's
- // canonical form with all escaping resolved. See comments for
- // newName() for details.
+ // Methods for name and array objects. The name value is in qpdf's canonical form with all
+ // escaping resolved. See comments for newName() for details.
QPDF_DLL
bool isOrHasName(std::string const&);
- // Make all resources in a resource dictionary indirect. This just
- // goes through all entries of top-level subdictionaries and
- // converts any direct objects to indirect objects. This can be
- // useful to call before mergeResources if it is going to be
- // called multiple times to prevent resources from being copied
- // multiple times.
+ // Make all resources in a resource dictionary indirect. This just goes through all entries of
+ // top-level subdictionaries and converts any direct objects to indirect objects. This can be
+ // useful to call before mergeResources if it is going to be called multiple times to prevent
+ // resources from being copied multiple times.
QPDF_DLL
void makeResourcesIndirect(QPDF& owning_qpdf);
- // Merge resource dictionaries. If the "conflicts" parameter is
- // provided, conflicts in dictionary subitems are resolved, and
- // "conflicts" is initialized to a map such that
+ // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in
+ // dictionary subitems are resolved, and "conflicts" is initialized to a map such that
// conflicts[resource_type][old_key] == [new_key]
//
- // See also makeResourcesIndirect, which can be useful to call
- // before calling this.
+ // See also makeResourcesIndirect, which can be useful to call before calling this.
//
- // This method does nothing if both this object and the other
- // object are not dictionaries. Otherwise, it has following
- // behavior, where "object" refers to the object whose method is
+ // This method does nothing if both this object and the other object are not dictionaries.
+ // Otherwise, it has following behavior, where "object" refers to the object whose method is
// invoked, and "other" refers to the argument:
//
// * For each key in "other" whose value is an array:
// * If "object" does not have that entry, shallow copy it.
- // * Otherwise, if "object" has an array in the same place,
- // append to that array any objects in "other"'s array that
- // are not already present.
+ // * Otherwise, if "object" has an array in the same place, append to that array any objects
+ // in "other"'s array that are not already present.
// * For each key in "other" whose value is a dictionary:
// * If "object" does not have that entry, shallow copy it.
// * Otherwise, for each key in the subdictionary:
- // * If key is not present in "object"'s entry, shallow copy
- // it if direct or just add it if indirect.
+ // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if
+ // indirect.
// * Otherwise, if conflicts are being detected:
- // * If there is a key (oldkey) already in the dictionary
- // that points to the same indirect destination as key,
- // indicate that key was replaced by oldkey. This would
- // happen if these two resource dictionaries have
- // previously been merged.
- // * Otherwise pick a new key (newkey) that is unique within
- // the resource dictionary, store that in the resource
- // dictionary with key's destination as its destination,
- // and indicate that key was replaced by newkey.
+ // * If there is a key (oldkey) already in the dictionary that points to the same indirect
+ // destination as key, indicate that key was replaced by oldkey. This would happen if
+ // these two resource dictionaries have previously been merged.
+ // * Otherwise pick a new key (newkey) that is unique within the resource dictionary,
+ // store that in the resource dictionary with key's destination as its destination, and
+ // indicate that key was replaced by newkey.
//
- // The primary purpose of this method is to facilitate merging of
- // resource dictionaries that are supposed to have the same scope
- // as each other. For example, this can be used to merge a form
- // XObject's /Resources dictionary with a form field's /DR or to
- // merge two /DR dictionaries. The "conflicts" parameter may be
- // previously initialized. This method adds to whatever is already
+ // The primary purpose of this method is to facilitate merging of resource dictionaries that are
+ // supposed to have the same scope as each other. For example, this can be used to merge a form
+ // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The
+ // "conflicts" parameter may be previously initialized. This method adds to whatever is already
// there, which can be useful when merging with multiple things.
QPDF_DLL
void mergeResources(
QPDFObjectHandle other,
std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr);
- // Get all resource names from a resource dictionary. If this
- // object is a dictionary, this method returns a set of all the
- // keys in all top-level subdictionaries. For resources
- // dictionaries, this is the collection of names that may be
- // referenced in the content stream.
+ // Get all resource names from a resource dictionary. If this object is a dictionary, this
+ // method returns a set of all the keys in all top-level subdictionaries. For resources
+ // dictionaries, this is the collection of names that may be referenced in the content stream.
QPDF_DLL
std::set<std::string> getResourceNames();
- // Find a unique name within a resource dictionary starting with a
- // given prefix. This method works by appending a number to the
- // given prefix. It searches starting with min_suffix and sets
- // min_suffix to selected value upon return. This can be used to
- // increase efficiency if adding multiple items with the same
- // prefix. (Why doesn't it set min_suffix to the next number?
- // Well, maybe you aren't going to actually use the name it
- // returns.) If you are calling this multiple times on the same
- // resource dictionary, you can initialize resource_names by
- // calling getResourceNames(), incrementally update it as you add
- // resources, and keep passing it in so that getUniqueResourceName
- // doesn't have to traverse the resource dictionary each time it's
- // called.
+ // Find a unique name within a resource dictionary starting with a given prefix. This method
+ // works by appending a number to the given prefix. It searches starting with min_suffix and
+ // sets min_suffix to selected value upon return. This can be used to increase efficiency if
+ // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next
+ // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling
+ // this multiple times on the same resource dictionary, you can initialize resource_names by
+ // calling getResourceNames(), incrementally update it as you add resources, and keep passing it
+ // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time
+ // it's called.
QPDF_DLL
std::string getUniqueResourceName(
std::string const& prefix,
int& min_suffix,
std::set<std::string>* resource_names = nullptr);
- // A QPDFObjectHandle has an owning QPDF if it is associated with
- // ("owned by") a specific QPDF object. Indirect objects always
- // have an owning QPDF. Direct objects that are read from the
- // input source will also have an owning QPDF. Programmatically
- // created objects will only have one if setObjectDescription was
- // called.
+ // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF
+ // object. Indirect objects always have an owning QPDF. Direct objects that are read from the
+ // input source will also have an owning QPDF. Programmatically created objects will only have
+ // one if setObjectDescription was called.
//
- // When the QPDF object that owns an object is destroyed, the
- // object is changed into a null, and its owner is cleared.
- // Therefore you should not retain the value of an owning QPDF
- // beyond the life of the QPDF. If in doubt, ask for it each time
- // you need it.
+ // When the QPDF object that owns an object is destroyed, the object is changed into a null, and
+ // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the
+ // life of the QPDF. If in doubt, ask for it each time you need it.
- // getOwningQPDF returns a pointer to the owning QPDF is the
- // object has one. Otherwise, it returns a null pointer. Use this
- // when you are able to handle the case of an object that doesn't
- // have an owning QPDF.
+ // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it
+ // returns a null pointer. Use this when you are able to handle the case of an object that
+ // doesn't have an owning QPDF.
QPDF_DLL
QPDF* getOwningQPDF() const;
- // getQPDF, new in qpdf 11, returns a reference owning QPDF. If
- // there is none, it throws a runtime_error. Use this when you
- // know the object has to have an owning QPDF, such as when it's a
- // known indirect object. Since streams are always indirect
- // objects, this method can be used safely for streams. If
- // error_msg is specified, it will be used at the contents of the
+ // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a
+ // runtime_error. Use this when you know the object has to have an owning QPDF, such as when
+ // it's a known indirect object. Since streams are always indirect objects, this method can be
+ // used safely for streams. If error_msg is specified, it will be used at the contents of the
// runtime_error if there is now owner.
QPDF_DLL
QPDF& getQPDF(std::string const& error_msg = "") const;
- // Create a shallow copy of an object as a direct object, but do not
- // traverse across indirect object boundaries. That means that,
- // for dictionaries and arrays, any keys or items that were
- // indirect objects will still be indirect objects that point to
- // the same place. In the strictest sense, this is not a shallow
- // copy because it recursively descends arrays and dictionaries;
- // it just doesn't cross over indirect objects. See also
- // unsafeShallowCopy(). You can't copy a stream this way. See
- // copyStream() instead.
+ // Create a shallow copy of an object as a direct object, but do not traverse across indirect
+ // object boundaries. That means that, for dictionaries and arrays, any keys or items that were
+ // indirect objects will still be indirect objects that point to the same place. In the
+ // strictest sense, this is not a shallow copy because it recursively descends arrays and
+ // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You
+ // can't copy a stream this way. See copyStream() instead.
QPDF_DLL
QPDFObjectHandle shallowCopy();
- // Create a true shallow copy of an array or dictionary, just
- // copying the immediate items (array) or keys (dictionary). This
- // is "unsafe" because, if you *modify* any of the items in the
- // copy, you are modifying the original, which is almost never
- // what you want. However, if your intention is merely to
- // *replace* top-level items or keys and not to modify lower-level
- // items in the copy, this method is much faster than
- // shallowCopy().
+ // Create a true shallow copy of an array or dictionary, just copying the immediate items
+ // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in
+ // the copy, you are modifying the original, which is almost never what you want. However, if
+ // your intention is merely to *replace* top-level items or keys and not to modify lower-level
+ // items in the copy, this method is much faster than shallowCopy().
QPDF_DLL
QPDFObjectHandle unsafeShallowCopy();
- // Create a copy of this stream. The new stream and the old stream
- // are independent: after the copy, either the original or the
- // copy's dictionary or data can be modified without affecting the
- // other. This uses StreamDataProvider internally, so no
- // unnecessary copies of the stream's data are made. If the source
- // stream's data is already being provided by a
- // StreamDataProvider, the new stream will use the same one, so
- // you have to make sure your StreamDataProvider can handle that
- // case. But if you're already using a StreamDataProvider, you
- // probably don't need to call this method.
+ // Create a copy of this stream. The new stream and the old stream are independent: after the
+ // copy, either the original or the copy's dictionary or data can be modified without affecting
+ // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's
+ // data are made. If the source stream's data is already being provided by a StreamDataProvider,
+ // the new stream will use the same one, so you have to make sure your StreamDataProvider can
+ // handle that case. But if you're already using a StreamDataProvider, you probably don't need
+ // to call this method.
QPDF_DLL
QPDFObjectHandle copyStream();
// Mutator methods.
- // Since qpdf 11: for mutators that may add or remove an item,
- // there are additional versions whose names contain "AndGet" that
- // return the added or removed item. For example:
+ // Since qpdf 11: for mutators that may add or remove an item, there are additional versions
+ // whose names contain "AndGet" that return the added or removed item. For example:
//
// auto new_dict = dict.replaceKeyAndGetNew(
// "/New", QPDFObjectHandle::newDictionary());
@@ -1049,15 +901,12 @@ class QPDFObjectHandle
// auto old_value = dict.replaceKeyAndGetOld(
// "/New", "(something)"_qpdf);
- // Recursively copy this object, making it direct. An exception is
- // thrown if a loop is detected. With allow_streams true, keep
- // indirect object references to streams. Otherwise, throw an
- // exception if any sub-object is a stream. Note that, when
- // allow_streams is true and a stream is found, the resulting
- // object is still associated with the containing qpdf. When
- // allow_streams is false, the object will no longer be connected
- // to the original QPDF object after this call completes
- // successfully.
+ // Recursively copy this object, making it direct. An exception is thrown if a loop is detected.
+ // With allow_streams true, keep indirect object references to streams. Otherwise, throw an
+ // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream
+ // is found, the resulting object is still associated with the containing qpdf. When
+ // allow_streams is false, the object will no longer be connected to the original QPDF object
+ // after this call completes successfully.
QPDF_DLL
void makeDirect(bool allow_streams = false);
@@ -1066,9 +915,8 @@ class QPDFObjectHandle
void setArrayItem(int, QPDFObjectHandle const&);
QPDF_DLL
void setArrayFromVector(std::vector<QPDFObjectHandle> const& items);
- // Insert an item before the item at the given position ("at") so
- // that it has that position after insertion. If "at" is equal to
- // the size of the array, insert the item at the end.
+ // Insert an item before the item at the given position ("at") so that it has that position
+ // after insertion. If "at" is equal to the size of the array, insert the item at the end.
QPDF_DLL
void insertItem(int at, QPDFObjectHandle const& item);
// Like insertItem but return the item that was inserted.
@@ -1080,8 +928,7 @@ class QPDFObjectHandle
// Append an item, and return the newly added item.
QPDF_DLL
QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item);
- // Remove the item at that position, reducing the size of the
- // array by one.
+ // Remove the item at that position, reducing the size of the array by one.
QPDF_DLL
void eraseItem(int at);
// Erase and item and return the item that was removed.
@@ -1090,22 +937,19 @@ class QPDFObjectHandle
// Mutator methods for dictionary objects
- // Replace value of key, adding it if it does not exist. If value
- // is null, remove the key.
+ // Replace value of key, adding it if it does not exist. If value is null, remove the key.
QPDF_DLL
void replaceKey(std::string const& key, QPDFObjectHandle const& value);
// Replace value of key and return the value.
QPDF_DLL
QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value);
- // Replace value of key and return the old value, or null if the
- // key was previously not present.
+ // Replace value of key and return the old value, or null if the key was previously not present.
QPDF_DLL
QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value);
// Remove key, doing nothing if key does not exist.
QPDF_DLL
void removeKey(std::string const& key);
- // Remove key and return the old value. If the old value didn't
- // exist, return a null object.
+ // Remove key and return the old value. If the old value didn't exist, return a null object.
QPDF_DLL
QPDFObjectHandle removeKeyAndGetOld(std::string const& key);
@@ -1117,31 +961,26 @@ class QPDFObjectHandle
QPDF_DLL
QPDFObjectHandle getDict();
- // By default, or if true passed, QPDFWriter will attempt to
- // filter a stream based on decode level, whether compression is
- // enabled, and its ability to filter. Passing false will prevent
- // QPDFWriter from attempting to filter the stream even if it can.
- // This includes both decoding and compressing. This makes it
- // possible for you to prevent QPDFWriter from uncompressing and
- // recompressing a stream that it knows how to operate on for any
- // application-specific reason, such as that you have already
- // optimized its filtering. Note that this doesn't affect any
- // other ways to get the stream's data, such as pipeStreamData or
- // getStreamData.
+ // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode
+ // level, whether compression is enabled, and its ability to filter. Passing false will prevent
+ // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding
+ // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and
+ // recompressing a stream that it knows how to operate on for any application-specific reason,
+ // such as that you have already optimized its filtering. Note that this doesn't affect any
+ // other ways to get the stream's data, such as pipeStreamData or getStreamData.
QPDF_DLL
void setFilterOnWrite(bool);
QPDF_DLL
bool getFilterOnWrite();
- // If addTokenFilter has been called for this stream, then the
- // original data should be considered to be modified. This means we
- // should avoid optimizations such as not filtering a stream that
- // is already compressed.
+ // If addTokenFilter has been called for this stream, then the original data should be
+ // considered to be modified. This means we should avoid optimizations such as not filtering a
+ // stream that is already compressed.
QPDF_DLL
bool isDataModified();
- // Returns filtered (uncompressed) stream data. Throws an
- // exception if the stream is filtered and we can't decode it.
+ // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered
+ // and we can't decode it.
QPDF_DLL
std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized);
@@ -1149,17 +988,15 @@ class QPDFObjectHandle
QPDF_DLL
std::shared_ptr<Buffer> getRawStreamData();
- // Write stream data through the given pipeline. A null pipeline
- // value may be used if all you want to do is determine whether a
- // stream is filterable and would be filtered based on the
- // provided flags. If flags is 0, write raw stream data and return
- // false. Otherwise, the flags alter the behavior in the following
- // way:
+ // Write stream data through the given pipeline. A null pipeline value may be used if all you
+ // want to do is determine whether a stream is filterable and would be filtered based on the
+ // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags
+ // alter the behavior in the following way:
//
// encode_flags:
//
- // qpdf_sf_compress -- compress data with /FlateDecode if no other
- // compression filters are applied.
+ // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are
+ // applied.
//
// qpdf_sf_normalize -- tokenize as content stream and normalize tokens
//
@@ -1167,45 +1004,33 @@ class QPDFObjectHandle
//
// qpdf_dl_none -- do not decode any streams.
//
- // qpdf_dl_generalized -- decode supported general-purpose
- // filters. This includes /ASCIIHexDecode, /ASCII85Decode,
- // /LZWDecode, and /FlateDecode.
+ // qpdf_dl_generalized -- decode supported general-purpose filters. This includes
+ // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode.
//
- // qpdf_dl_specialized -- in addition to generalized filters, also
- // decode supported non-lossy specialized filters. This includes
- // /RunLengthDecode.
+ // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy
+ // specialized filters. This includes /RunLengthDecode.
//
- // qpdf_dl_all -- in addition to generalized and non-lossy
- // specialized filters, decode supported lossy filters. This
- // includes /DCTDecode.
+ // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported
+ // lossy filters. This includes /DCTDecode.
//
- // If, based on the flags and the filters and decode parameters,
- // we determine that we know how to apply all requested filters,
- // do so and return true if we are successful.
+ // If, based on the flags and the filters and decode parameters, we determine that we know how
+ // to apply all requested filters, do so and return true if we are successful.
//
- // The exact meaning of the return value differs the different
- // versions of this function, but for any version, the meaning has
- // been the same. For the main version, added in qpdf 10, the
- // return value indicates whether the overall operation succeeded.
- // The filter parameter, if specified, will be set to whether or
- // not filtering was attempted. If filtering was not requested,
- // this value will be false even if the overall operation
- // succeeded.
+ // The exact meaning of the return value differs the different versions of this function, but
+ // for any version, the meaning has been the same. For the main version, added in qpdf 10, the
+ // return value indicates whether the overall operation succeeded. The filter parameter, if
+ // specified, will be set to whether or not filtering was attempted. If filtering was not
+ // requested, this value will be false even if the overall operation succeeded.
//
- // If filtering is requested but this method returns false, it
- // means there was some error in the filtering, in which case the
- // resulting data is likely partially filtered and/or incomplete
- // and may not be consistent with the configured filters.
- // QPDFWriter handles this by attempting to get the stream data
- // without filtering, but callers should consider a false return
- // value when decode_level is not qpdf_dl_none to be a potential
- // loss of data. If you intend to retry in that case, pass true as
- // the value of will_retry. This changes the warning issued by the
- // library to indicate that the operation will be retried without
- // filtering to avoid data loss.
-
- // Return value is overall success, even if filtering is not
- // requested.
+ // If filtering is requested but this method returns false, it means there was some error in the
+ // filtering, in which case the resulting data is likely partially filtered and/or incomplete
+ // and may not be consistent with the configured filters. QPDFWriter handles this by attempting
+ // to get the stream data without filtering, but callers should consider a false return value
+ // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry
+ // in that case, pass true as the value of will_retry. This changes the warning issued by the
+ // library to indicate that the operation will be retried without filtering to avoid data loss.
+
+ // Return value is overall success, even if filtering is not requested.
QPDF_DLL
bool pipeStreamData(
Pipeline*,
@@ -1215,9 +1040,8 @@ class QPDFObjectHandle
bool suppress_warnings = false,
bool will_retry = false);
- // Legacy version. Return value is whether filtering was
- // attempted. There is no way to determine success if filtering
- // was not attempted.
+ // Legacy version. Return value is whether filtering was attempted. There is no way to determine
+ // success if filtering was not attempted.
QPDF_DLL
bool pipeStreamData(
Pipeline*,
@@ -1226,8 +1050,7 @@ class QPDFObjectHandle
bool suppress_warnings = false,
bool will_retry = false);
- // Legacy pipeStreamData. This maps to the the flags-based
- // pipeStreamData as follows:
+ // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows:
// filter = false -> encode_flags = 0
// filter = true -> decode_level = qpdf_dl_generalized
// normalize = true -> encode_flags |= qpdf_sf_normalize
@@ -1236,70 +1059,57 @@ class QPDFObjectHandle
QPDF_DLL
bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress);
- // Replace a stream's dictionary. The new dictionary must be
- // consistent with the stream's data. This is most appropriately
- // used when creating streams from scratch that will use a stream
- // data provider and therefore start with an empty dictionary. It
- // may be more convenient in this case than calling getDict and
- // modifying it for each key. The pdf-create example does this.
+ // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data.
+ // This is most appropriately used when creating streams from scratch that will use a stream
+ // data provider and therefore start with an empty dictionary. It may be more convenient in
+ // this case than calling getDict and modifying it for each key. The pdf-create example does
+ // this.
QPDF_DLL
void replaceDict(QPDFObjectHandle const&);
// REPLACING STREAM DATA
- // Note about all replaceStreamData methods: whatever values are
- // passed as filter and decode_parms will overwrite /Filter and
- // /DecodeParms in the stream. Passing a null object
- // (QPDFObjectHandle::newNull()) will remove those values from the
- // stream dictionary. From qpdf 11, passing an *uninitialized*
- // QPDFObjectHandle (QPDFObjectHandle()) will leave any existing
+ // Note about all replaceStreamData methods: whatever values are passed as filter and
+ // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object
+ // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf
+ // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing
// values untouched.
- // Replace this stream's stream data with the given data buffer.
- // The stream's /Length key is replaced with the length of the
- // data buffer. The stream is interpreted as if the data read from
- // the file, after any decryption filters have been applied, is as
- // presented.
+ // Replace this stream's stream data with the given data buffer. The stream's /Length key is
+ // replaced with the length of the data buffer. The stream is interpreted as if the data read
+ // from the file, after any decryption filters have been applied, is as presented.
QPDF_DLL
void replaceStreamData(
std::shared_ptr<Buffer> data,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
- // Replace the stream's stream data with the given string.
- // This method will create a copy of the data rather than using
- // the user-provided buffer as in the std::shared_ptr<Buffer> version
- // of replaceStreamData.
+ // Replace the stream's stream data with the given string. This method will create a copy of the
+ // data rather than using the user-provided buffer as in the std::shared_ptr<Buffer> version of
+ // replaceStreamData.
QPDF_DLL
void replaceStreamData(
std::string const& data,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
- // As above, replace this stream's stream data. Instead of
- // directly providing a buffer with the stream data, call the
- // given provider's provideStreamData method. See comments on the
- // StreamDataProvider class (defined above) for details on the
- // method. The data must be consistent with filter and
- // decode_parms as provided. Although it is more complex to use
- // this form of replaceStreamData than the one that takes a
- // buffer, it makes it possible to avoid allocating memory for the
- // stream data. Example programs are provided that use both forms
- // of replaceStreamData.
-
- // Note about stream length: for any given stream, the provider
- // must provide the same amount of data each time it is called.
- // This is critical for making linearization work properly.
- // Versions of qpdf before 3.0.0 required a length to be specified
- // here. Starting with version 3.0.0, this is no longer necessary
- // (or permitted). The first time the stream data provider is
- // invoked for a given stream, the actual length is stored.
- // Subsequent times, it is enforced that the length be the same as
- // the first time.
-
- // If you have gotten a compile error here while building code
- // that worked with older versions of qpdf, just omit the length
- // parameter. You can also simplify your code by not having to
+ // As above, replace this stream's stream data. Instead of directly providing a buffer with the
+ // stream data, call the given provider's provideStreamData method. See comments on the
+ // StreamDataProvider class (defined above) for details on the method. The data must be
+ // consistent with filter and decode_parms as provided. Although it is more complex to use this
+ // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid
+ // allocating memory for the stream data. Example programs are provided that use both forms of
+ // replaceStreamData.
+
+ // Note about stream length: for any given stream, the provider must provide the same amount of
+ // data each time it is called. This is critical for making linearization work properly.
+ // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with
+ // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data
+ // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is
+ // enforced that the length be the same as the first time.
+
+ // If you have gotten a compile error here while building code that worked with older versions
+ // of qpdf, just omit the length parameter. You can also simplify your code by not having to
// compute the length in advance.
QPDF_DLL
void replaceStreamData(
@@ -1307,33 +1117,28 @@ class QPDFObjectHandle
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
- // Starting in qpdf 10.2, you can use C++-11 function objects
- // instead of StreamDataProvider.
+ // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider.
- // The provider should write the stream data to the pipeline. For
- // a one-liner to replace stream data with the contents of a file,
- // pass QUtil::file_provider(filename) as provider.
+ // The provider should write the stream data to the pipeline. For a one-liner to replace stream
+ // data with the contents of a file, pass QUtil::file_provider(filename) as provider.
QPDF_DLL
void replaceStreamData(
std::function<void(Pipeline*)> provider,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
- // The provider should write the stream data to the pipeline,
- // returning true if it succeeded without errors.
+ // The provider should write the stream data to the pipeline, returning true if it succeeded
+ // without errors.
QPDF_DLL
void replaceStreamData(
std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
- // Access object ID and generation. For direct objects, return
- // object ID 0.
+ // Access object ID and generation. For direct objects, return object ID 0.
- // NOTE: Be careful about calling getObjectID() and
- // getGeneration() directly as this can lead to the pattern of
- // depending on object ID or generation without the other. In
- // general, when keeping track of object IDs, it's better to use
- // QPDFObjGen instead.
+ // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to
+ // the pattern of depending on object ID or generation without the other. In general, when
+ // keeping track of object IDs, it's better to use QPDFObjGen instead.
QPDF_DLL
QPDFObjGen getObjGen() const;
@@ -1346,51 +1151,40 @@ class QPDFObjectHandle
std::string unparse();
QPDF_DLL
std::string unparseResolved();
- // For strings only, force binary representation. Otherwise, same
- // as unparse.
+ // For strings only, force binary representation. Otherwise, same as unparse.
QPDF_DLL
std::string unparseBinary();
- // Return encoded as JSON. The constant JSON::LATEST can be used
- // to specify the latest available JSON version. The JSON is
- // generated as follows:
- // * Arrays, dictionaries, booleans, nulls, integers, and real
- // numbers are represented by their native JSON types.
- // * Names are encoded as strings representing the canonical
- // representation (after parsing #xx) and preceded by a slash,
- // just as unparse() returns. For example, the JSON for the
+ // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available
+ // JSON version. The JSON is generated as follows:
+ // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their
+ // native JSON types.
+ // * Names are encoded as strings representing the canonical representation (after parsing #xx)
+ // and preceded by a slash, just as unparse() returns. For example, the JSON for the
// PDF-syntax name /Text#2fPlain would be "/Text/Plain".
// * Indirect references are encoded as strings containing "obj gen R"
// * Strings
- // * JSON v1: Strings are encoded as UTF-8 strings with
- // unrepresentable binary characters encoded as \uHHHH.
- // Characters in PDF Doc encoding that don't have
- // bidirectional unicode mappings are not reversible. There is
- // no way to tell the difference between a string that looks
- // like a name or indirect object from an actual name or
- // indirect object.
+ // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters
+ // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode
+ // mappings are not reversible. There is no way to tell the difference between a string that
+ // looks like a name or indirect object from an actual name or indirect object.
// * JSON v2:
- // * Unicode strings and strings encoded with PDF Doc encoding
- // that can be bidrectionally mapped two Unicode (which is
- // all strings without undefined characters) are represented
+ // * Unicode strings and strings encoded with PDF Doc encoding that can be bidrectionally
+ // mapped two Unicode (which is all strings without undefined characters) are represented
// as "u:" followed by the UTF-8 encoded string. Example:
// "u:potato".
- // * All other strings are represented as "b:" followed by a
- // hexadecimal encoding of the string. Example: "b:0102cacb"
+ // * All other strings are represented as "b:" followed by a hexadecimal encoding of the
+ // string. Example: "b:0102cacb"
// * Streams
- // * JSON v1: Only the stream's dictionary is encoded. There is
- // no way tell a stream from a dictionary other than context.
- // * JSON v2: A stream is encoded as {"dict": {...}} with the
- // value being the encoding of the stream's dictionary. Since
- // "dict" does not otherwise represent anything, this is
- // unambiguous. The getStreamJSON() call can be used to add
- // encoding of the stream's data.
- // * Object types that are only valid in content streams (inline
- // image, operator) are serialized as "null". Attempting to
- // serialize a "reserved" object is an error.
- // If dereference_indirect is true and this is an indirect object,
- // show the actual contents of the object. The effect of
- // dereference_indirect applies only to this object. It is not
+ // * JSON v1: Only the stream's dictionary is encoded. There is no way tell a stream from a
+ // dictionary other than context.
+ // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the
+ // stream's dictionary. Since "dict" does not otherwise represent anything, this is
+ // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data.
+ // * Object types that are only valid in content streams (inline image, operator) are serialized
+ // as "null". Attempting to serialize a "reserved" object is an error.
+ // If dereference_indirect is true and this is an indirect object, show the actual contents of
+ // the object. The effect of dereference_indirect applies only to this object. It is not
// recursive.
QPDF_DLL
JSON getJSON(int json_version, bool dereference_indirect = false);
@@ -1400,36 +1194,28 @@ class QPDFObjectHandle
[[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
getJSON(bool dereference_indirect = false);
- // This method can be called on a stream to get a more extended
- // JSON representation of the stream that includes the stream's
- // data. The JSON object returned is always a dictionary whose
- // "dict" key is an encoding of the stream's dictionary. The
- // representation of the data is determined by the json_data
- // field.
+ // This method can be called on a stream to get a more extended JSON representation of the
+ // stream that includes the stream's data. The JSON object returned is always a dictionary whose
+ // "dict" key is an encoding of the stream's dictionary. The representation of the data is
+ // determined by the json_data field.
//
- // The json_data field may have the value qpdf_sj_none,
- // qpdf_sj_inline, or qpdf_sj_file.
+ // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file.
//
// If json_data is qpdf_sj_none, stream data is not represented.
//
- // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
- // data is filtered or not based on the value of decode_level,
- // which has the same meaning as with pipeStreamData.
+ // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on
+ // the value of decode_level, which has the same meaning as with pipeStreamData.
//
- // If json_data is qpdf_sj_inline, the base64-encoded stream data
- // is included in the "data" field of the dictionary that is
- // returned.
+ // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data"
+ // field of the dictionary that is returned.
//
- // If json_data is qpdf_sj_file, then the Pipeline ("p") and
- // data_filename argument must be supplied. The value of
- // data_filename is stored in the resulting json in the "datafile"
- // key but is not otherwise use. The stream data itself (raw or
- // filtered depending on decode level), is written to the pipeline
- // via pipeStreamData().
+ // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be
+ // supplied. The value of data_filename is stored in the resulting json in the "datafile" key
+ // but is not otherwise use. The stream data itself (raw or filtered depending on decode level),
+ // is written to the pipeline via pipeStreamData().
//
- // NOTE: When json_data is qpdf_sj_inline, the QPDF object from
- // which the stream originates must remain valid until after the
- // JSON object is written.
+ // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must
+ // remain valid until after the JSON object is written.
QPDF_DLL
JSON getStreamJSON(
int json_version,
@@ -1438,11 +1224,9 @@ class QPDFObjectHandle
Pipeline* p,
std::string const& data_filename);
- // Legacy helper methods for commonly performed operations on
- // pages. Newer code should use QPDFPageObjectHelper instead. The
- // specification and behavior of these methods are the same as the
- // identically named methods in that class, but newer
- // functionality will be added there.
+ // Legacy helper methods for commonly performed operations on pages. Newer code should use
+ // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as
+ // the identically named methods in that class, but newer functionality will be added there.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getPageImages();
QPDF_DLL
@@ -1455,18 +1239,15 @@ class QPDFObjectHandle
void coalesceContentStreams();
// End legacy page helpers
- // Issue a warning about this object if possible. If the object
- // has a description, a warning will be issued using the owning
- // QPDF as context. Otherwise, a message will be written to the
- // default logger's error stream, which is standard error if not
- // overridden. Objects read normally from the file have
- // descriptions. See comments on setObjectDescription for
- // additional details.
+ // Issue a warning about this object if possible. If the object has a description, a warning
+ // will be issued using the owning QPDF as context. Otherwise, a message will be written to the
+ // default logger's error stream, which is standard error if not overridden. Objects read
+ // normally from the file have descriptions. See comments on setObjectDescription for additional
+ // details.
QPDF_DLL
void warnIfPossible(std::string const& warning);
- // Provide access to specific classes for recursive
- // disconnected().
+ // Provide access to specific classes for recursive disconnected().
class DisconnectAccess
{
friend class QPDF_Dictionary;
@@ -1480,9 +1261,8 @@ class QPDFObjectHandle
}
};
- // Convenience routine: Throws if the assumption is violated. Your
- // code will be better if you call one of the isType methods and
- // handle the case of the type being wrong, but these can be
+ // Convenience routine: Throws if the assumption is violated. Your code will be better if you
+ // call one of the isType methods and handle the case of the type being wrong, but these can be
// convenient if you have already verified the type.
QPDF_DLL
void assertInitialized() const;
@@ -1519,11 +1299,10 @@ class QPDFObjectHandle
QPDF_DLL
void assertNumber();
- // The isPageObject method checks the /Type key of the object.
- // This is not completely reliable as there are some otherwise
- // valid files whose /Type is wrong for page objects. qpdf is
- // slightly more accepting but may still return false here when
- // treating the object as a page would work. Use this sparingly.
+ // The isPageObject method checks the /Type key of the object. This is not completely reliable
+ // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is
+ // slightly more accepting but may still return false here when treating the object as a page
+ // would work. Use this sparingly.
QPDF_DLL
bool isPageObject();
QPDF_DLL
@@ -1534,13 +1313,12 @@ class QPDFObjectHandle
QPDF_DLL
bool isFormXObject();
- // Indicate if this is an image. If exclude_imagemask is true,
- // don't count image masks as images.
+ // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as
+ // images.
QPDF_DLL
bool isImage(bool exclude_imagemask = true);
- // The following methods do not form part of the public API and are for
- // internal use only.
+ // The following methods do not form part of the public API and are for internal use only.
QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) :
obj(obj)
@@ -1600,9 +1378,8 @@ class QPDFObjectHandle
static void warn(QPDF*, QPDFExc const&);
void checkOwnership(QPDFObjectHandle const&) const;
- // Moving members of QPDFObjectHandle into a smart pointer incurs
- // a substantial performance penalty since QPDFObjectHandle
- // objects are copied around so frequently.
+ // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance
+ // penalty since QPDFObjectHandle objects are copied around so frequently.
std::shared_ptr<QPDFObject> obj;
};
@@ -1611,13 +1388,12 @@ class QPDFObjectHandle
// auto oh = "<< /Key (value) >>"_qpdf;
-// If this is causing problems in your code, define
-// QPDF_NO_QPDF_STRING to prevent the declaration from being here.
+// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration
+// from being here.
/* clang-format off */
-// Disable formatting for this declaration: emacs font-lock in cc-mode
-// (as of 28.1) treats the rest of the file as a string if
-// clang-format removes the space after "operator", and as of
+// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest
+// of the file as a string if clang-format removes the space after "operator", and as of
// clang-format 15, there's no way to prevent it from doing so.
QPDF_DLL
QPDFObjectHandle operator ""_qpdf(char const* v, size_t len);
@@ -1627,8 +1403,8 @@ QPDFObjectHandle operator ""_qpdf(char const* v, size_t len);
class QPDFObjectHandle::QPDFDictItems
{
- // This class allows C++-style iteration, including range-for
- // iteration, around dictionaries. You can write
+ // This class allows C++-style iteration, including range-for iteration, around dictionaries.
+ // You can write
// for (auto iter: QPDFDictItems(dictionary_obj))
// {
@@ -1636,8 +1412,7 @@ class QPDFObjectHandle::QPDFDictItems
// // iter.second is a QPDFObjectHandle
// }
- // See examples/pdf-name-number-tree.cc for a demonstration of
- // using this API.
+ // See examples/pdf-name-number-tree.cc for a demonstration of using this API.
public:
QPDF_DLL
@@ -1727,16 +1502,15 @@ class QPDFObjectHandle::QPDFDictItems
class QPDFObjectHandle::QPDFArrayItems
{
- // This class allows C++-style iteration, including range-for
- // iteration, around arrays. You can write
+ // This class allows C++-style iteration, including range-for iteration, around arrays. You can
+ // write
// for (auto iter: QPDFArrayItems(array_obj))
// {
// // iter is a QPDFObjectHandle
// }
- // See examples/pdf-name-number-tree.cc for a demonstration of
- // using this API.
+ // See examples/pdf-name-number-tree.cc for a demonstration of using this API.
public:
QPDF_DLL
diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh
index 05a57b5c..98bb078e 100644
--- a/include/qpdf/QPDFPageObjectHelper.hh
+++ b/include/qpdf/QPDFPageObjectHelper.hh
@@ -2,22 +2,19 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDFPAGEOBJECTHELPER_HH
#define QPDFPAGEOBJECTHELPER_HH
@@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper;
class QPDFPageObjectHelper: public QPDFObjectHelper
{
- // This is a helper class for page objects, but as of qpdf 10.1,
- // many of the methods also work for form XObjects. When this is
- // the case, it is noted in the comment.
+ // This is a helper class for page objects, but as of qpdf 10.1, many of the methods also work
+ // for form XObjects. When this is the case, it is noted in the comment.
public:
QPDF_DLL
@@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// PAGE ATTRIBUTES
- // The getAttribute method works with pages and form XObjects. It
- // return the value of the requested attribute from the page/form
- // XObject's dictionary, taking inheritance from the pages tree
- // into consideration. For pages, the attributes /MediaBox,
- // /CropBox, /Resources, and /Rotate are inheritable, meaning that
- // if they are not present directly on the page node, they may be
- // inherited from ancestor nodes in the pages tree.
+ // The getAttribute method works with pages and form XObjects. It return the value of the
+ // requested attribute from the page/form XObject's dictionary, taking inheritance from the
+ // pages tree into consideration. For pages, the attributes /MediaBox, /CropBox, /Resources, and
+ // /Rotate are inheritable, meaning that if they are not present directly on the page node, they
+ // may be inherited from ancestor nodes in the pages tree.
//
// There are two ways that an attribute can be "shared":
//
- // * For inheritable attributes on pages, it may appear in a
- // higher level node of the pages tree
+ // * For inheritable attributes on pages, it may appear in a higher level node of the pages tree
//
- // * For any attribute, the attribute may be an indirect object
- // which may be referenced by more than one page/form XObject.
+ // * For any attribute, the attribute may be an indirect object which may be referenced by more
+ // than one page/form XObject.
//
- // If copy_if_shared is true, then this method will replace the
- // attribute with a shallow copy if it is indirect or inherited
- // and return the copy. You should do this if you are going to
- // modify the returned object and want the modifications to apply
- // to the current page/form XObject only.
+ // If copy_if_shared is true, then this method will replace the attribute with a shallow copy if
+ // it is indirect or inherited and return the copy. You should do this if you are going to
+ // modify the returned object and want the modifications to apply to the current page/form
+ // XObject only.
QPDF_DLL
QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared);
// PAGE BOXES
//
- // Pages have various types of boundary boxes. These are described
- // in detail in the PDF specification (section 14.11.2 Page
- // boundaries). They are, by key in the page dictionary:
+ // Pages have various types of boundary boxes. These are described in detail in the PDF
+ // specification (section 14.11.2 Page boundaries). They are, by key in the page dictionary:
//
// * /MediaBox -- boundaries of physical page
// * /CropBox -- clipping region of what is displayed
@@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// fallback value for /CropBox is /MediaBox, and the fallback
// values for the other boxes are /CropBox.
//
- // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be
- // inherited from parent nodes in the pages tree. The other boxes
- // can't be inherited.
+ // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be inherited from parent nodes
+ // in the pages tree. The other boxes can't be inherited.
//
- // When the comments below refer to the "effective value" of an
- // box, this takes into consideration both inheritance through the
- // pages tree (in the case of /MediaBox and /CropBox) and fallback
- // values for missing attributes (for all except /MediaBox).
+ // When the comments below refer to the "effective value" of an box, this takes into
+ // consideration both inheritance through the pages tree (in the case of /MediaBox and /CropBox)
+ // and fallback values for missing attributes (for all except /MediaBox).
//
- // For the methods below, copy_if_shared is passed to getAttribute
- // and therefore refers only to indirect objects and values that
- // are inherited through the pages tree.
+ // For the methods below, copy_if_shared is passed to getAttribute and therefore refers only to
+ // indirect objects and values that are inherited through the pages tree.
//
- // If copy_if_fallback is true, a copy is made if the object's
- // value was obtained by falling back to a different box.
+ // If copy_if_fallback is true, a copy is made if the object's value was obtained by falling
+ // back to a different box.
//
- // The copy_if_shared and copy_if_fallback parameters carry across
- // multiple layers. This is explained below.
+ // The copy_if_shared and copy_if_fallback parameters carry across multiple layers. This is
+ // explained below.
//
- // You should set copy_if_shared to true if you want to modify a
- // bounding box for the current page without affecting other pages
- // but you don't want to change the fallback behavior. For
- // example, if you want to modify the /TrimBox for the current
- // page only but have it continue to fall back to the value of
- // /CropBox or /MediaBox if they are not defined, you could set
+ // You should set copy_if_shared to true if you want to modify a bounding box for the current
+ // page without affecting other pages but you don't want to change the fallback behavior. For
+ // example, if you want to modify the /TrimBox for the current page only but have it continue to
+ // fall back to the value of /CropBox or /MediaBox if they are not defined, you could set
// copy_if_shared to true.
//
- // You should set copy_if_fallback to true if you want to modify a
- // specific box as distinct from any other box. For example, if
- // you want to make /TrimBox differ from /CropBox, then you should
- // set copy_if_fallback to true.
+ // You should set copy_if_fallback to true if you want to modify a specific box as distinct from
+ // any other box. For example, if you want to make /TrimBox differ from /CropBox, then you
+ // should set copy_if_fallback to true.
//
// The copy_if_fallback flags were added in qpdf 11.
//
- // For example, suppose that neither /CropBox nor /TrimBox is
- // present on a page but /CropBox is present in the page's parent
- // node in the page tree.
+ // For example, suppose that neither /CropBox nor /TrimBox is present on a page but /CropBox is
+ // present in the page's parent node in the page tree.
//
- // * getTrimBox(false, false) would return the /CropBox from the
- // parent node.
+ // * getTrimBox(false, false) would return the /CropBox from the parent node.
//
- // * getTrimBox(true, false) would make a shallow copy of the
- // /CropBox from the parent node into the current node and
- // return it.
+ // * getTrimBox(true, false) would make a shallow copy of the /CropBox from the parent node into
+ // the current node and return it.
//
- // * getTrimBox(false, true) would make a shallow copy of the
- // /CropBox from the parent node into /TrimBox of the current
- // node and return it.
+ // * getTrimBox(false, true) would make a shallow copy of the /CropBox from the parent node into
+ // /TrimBox of the current node and return it.
//
- // * getTrimBox(true, true) would make a shallow copy of the
- // /CropBox from the parent node into the current node, then
- // make a shallow copy of the resulting copy to /TrimBox of the
- // current node, and then return that.
+ // * getTrimBox(true, true) would make a shallow copy of the /CropBox from the parent node into
+ // the current node, then make a shallow copy of the resulting copy to /TrimBox of the current
+ // node, and then return that.
//
- // To illustrate how these parameters carry across multiple
- // layers, suppose that neither /MediaBox, /CropBox, nor /TrimBox
- // is present on a page but /MediaBox is present on the parent. In
- // this case:
+ // To illustrate how these parameters carry across multiple layers, suppose that neither
+ // /MediaBox, /CropBox, nor /TrimBox is present on a page but /MediaBox is present on the
+ // parent. In this case:
//
- // * getTrimBox(false, false) would return the value of /MediaBox
- // from the parent node.
+ // * getTrimBox(false, false) would return the value of /MediaBox from the parent node.
//
- // * getTrimBox(true, false) would copy /MediaBox to the current
- // node and return it.
+ // * getTrimBox(true, false) would copy /MediaBox to the current node and return it.
//
- // * getTrimBox(false, true) would first copy /MediaBox from the
- // parent to /CropBox, then copy /CropBox to /TrimBox, and then
- // return the result.
+ // * getTrimBox(false, true) would first copy /MediaBox from the parent to /CropBox, then copy
+ // /CropBox to /TrimBox, and then return the result.
//
- // * getTrimBox(true, true) would first copy /MediaBox from the
- // parent to the current page, then copy it to /CropBox, then
- // copy /CropBox to /TrimBox, and then return the result.
+ // * getTrimBox(true, true) would first copy /MediaBox from the parent to the current page, then
+ // copy it to /CropBox, then copy /CropBox to /TrimBox, and then return the result.
//
- // If you need different behavior, call getAttribute directly and
- // take care of your own copying.
+ // If you need different behavior, call getAttribute directly and take care of your own copying.
// Return the effective MediaBox
QPDF_DLL
QPDFObjectHandle getMediaBox(bool copy_if_shared = false);
- // Return the effective CropBox. If not defined, fall back to
- // MediaBox
+ // Return the effective CropBox. If not defined, fall back to MediaBox
QPDF_DLL
QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false);
- // Return the effective BleedBox. If not defined, fall back to
- // CropBox.
+ // Return the effective BleedBox. If not defined, fall back to CropBox.
QPDF_DLL
QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false);
- // Return the effective TrimBox. If not defined, fall back to
- // CropBox.
+ // Return the effective TrimBox. If not defined, fall back to CropBox.
QPDF_DLL
QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false);
- // Return the effective ArtBox. If not defined, fall back to
- // CropBox.
+ // Return the effective ArtBox. If not defined, fall back to CropBox.
QPDF_DLL
QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false);
- // Iterate through XObjects, possibly recursing into form
- // XObjects. This works with pages or form XObjects. Call action
- // on each XObject for which selector, if specified, returns true.
- // With no selector, calls action for every object. In addition to
- // the object being passed to action, the containing XObject
- // dictionary and key are passed in. Remember that the XObject
- // dictionary may be shared, and the object may appear in multiple
- // XObject dictionaries.
+ // Iterate through XObjects, possibly recursing into form XObjects. This works with pages or
+ // form XObjects. Call action on each XObject for which selector, if specified, returns true.
+ // With no selector, calls action for every object. In addition to the object being passed to
+ // action, the containing XObject dictionary and key are passed in. Remember that the XObject
+ // dictionary may be shared, and the object may appear in multiple XObject dictionaries.
QPDF_DLL
void forEachXObject(
bool recursive,
@@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
std::function<void(
QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action);
- // Returns an empty map if there are no images or no resources.
- // Prior to qpdf 8.4.0, this function did not support inherited
- // resources, but it does now. Return value is a map from XObject
- // name to the image object, which is always a stream. Works with
- // form XObjects as well as pages. This method does not recurse
- // into nested form XObjects. For that, use forEachImage.
+ // Returns an empty map if there are no images or no resources. Prior to qpdf 8.4.0, this
+ // function did not support inherited resources, but it does now. Return value is a map from
+ // XObject name to the image object, which is always a stream. Works with form XObjects as well
+ // as pages. This method does not recurse into nested form XObjects. For that, use forEachImage.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getImages();
@@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getPageImages();
- // Returns an empty map if there are no form XObjects or no
- // resources. Otherwise, returns a map of keys to form XObjects
- // directly referenced from this page or form XObjects. This does
- // not recurse into nested form XObjects. For that, use
- // forEachFormXObject.
+ // Returns an empty map if there are no form XObjects or no resources. Otherwise, returns a map
+ // of keys to form XObjects directly referenced from this page or form XObjects. This does not
+ // recurse into nested form XObjects. For that, use forEachFormXObject.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getFormXObjects();
- // Converts each inline image to an external (normal) image if the
- // size is at least the specified number of bytes. This method
- // works with pages or form XObjects. By default, it recursively
- // processes nested form XObjects. Pass true as shallow to avoid
- // this behavior. Prior to qpdf 10.1, form XObjects were ignored,
- // but this was considered a bug.
+ // Converts each inline image to an external (normal) image if the size is at least the
+ // specified number of bytes. This method works with pages or form XObjects. By default, it
+ // recursively processes nested form XObjects. Pass true as shallow to avoid this behavior.
+ // Prior to qpdf 10.1, form XObjects were ignored, but this was considered a bug.
QPDF_DLL
void externalizeInlineImages(size_t min_size = 0, bool shallow = false);
- // Return the annotations in the page's "/Annots" list, if any. If
- // only_subtype is non-empty, only include annotations of the
- // given subtype.
+ // Return the annotations in the page's "/Annots" list, if any. If only_subtype is non-empty,
+ // only include annotations of the given subtype.
QPDF_DLL
std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = "");
- // Returns a vector of stream objects representing the content
- // streams for the given page. This routine allows the caller to
- // not care whether there are one or more than one content streams
+ // Returns a vector of stream objects representing the content streams for the given page. This
+ // routine allows the caller to not care whether there are one or more than one content streams
// for a page.
QPDF_DLL
std::vector<QPDFObjectHandle> getPageContents();
- // Add the given object as a new content stream for this page. If
- // parameter 'first' is true, add to the beginning. Otherwise, add
- // to the end. This routine automatically converts the page
- // contents to an array if it is a scalar, allowing the caller not
- // to care what the initial structure is. You can call
- // coalesceContentStreams() afterwards if you want to force it to
- // be a single stream.
+ // Add the given object as a new content stream for this page. If parameter 'first' is true, add
+ // to the beginning. Otherwise, add to the end. This routine automatically converts the page
+ // contents to an array if it is a scalar, allowing the caller not to care what the initial
+ // structure is. You can call coalesceContentStreams() afterwards if you want to force it to be
+ // a single stream.
QPDF_DLL
void addPageContents(QPDFObjectHandle contents, bool first);
- // Rotate a page. If relative is false, set the rotation of the
- // page to angle. Otherwise, add angle to the rotation of the
- // page. Angle must be a multiple of 90. Adding 90 to the rotation
+ // Rotate a page. If relative is false, set the rotation of the page to angle. Otherwise, add
+ // angle to the rotation of the page. Angle must be a multiple of 90. Adding 90 to the rotation
// rotates clockwise by 90 degrees.
QPDF_DLL
void rotatePage(int angle, bool relative);
- // Coalesce a page's content streams. A page's content may be a
- // stream or an array of streams. If this page's content is an
- // array, concatenate the streams into a single stream. This can
- // be useful when working with files that split content streams in
- // arbitrary spots, such as in the middle of a token, as that can
- // confuse some software. You could also call this after calling
+ // Coalesce a page's content streams. A page's content may be a stream or an array of streams.
+ // If this page's content is an array, concatenate the streams into a single stream. This can be
+ // useful when working with files that split content streams in arbitrary spots, such as in the
+ // middle of a token, as that can confuse some software. You could also call this after calling
// addPageContents.
QPDF_DLL
void coalesceContentStreams();
@@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// Content stream handling
//
- // Parse a page's contents through ParserCallbacks, described
- // above. This method works whether the contents are a single
- // stream or an array of streams. Call on a page object. Also
- // works for form XObjects.
+ // Parse a page's contents through ParserCallbacks, described above. This method works whether
+ // the contents are a single stream or an array of streams. Call on a page object. Also works
+ // for form XObjects.
QPDF_DLL
void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks);
// Old name
QPDF_DLL
void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
- // Pass a page's or form XObject's contents through the given
- // TokenFilter. If a pipeline is also provided, it will be the
- // target of the write methods from the token filter. If a
- // pipeline is not specified, any output generated by the token
- // filter will be discarded. Use this interface if you need to
- // pass a page's contents through filter for work purposes without
- // having that filter automatically applied to the page's
- // contents, as happens with addContentTokenFilter. See
- // examples/pdf-count-strings.cc for an example.
+ // Pass a page's or form XObject's contents through the given TokenFilter. If a pipeline is also
+ // provided, it will be the target of the write methods from the token filter. If a pipeline is
+ // not specified, any output generated by the token filter will be discarded. Use this interface
+ // if you need to pass a page's contents through filter for work purposes without having that
+ // filter automatically applied to the page's contents, as happens with addContentTokenFilter.
+ // See examples/pdf-count-strings.cc for an example.
QPDF_DLL
void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr);
@@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL
void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr);
- // Pipe a page's contents through the given pipeline. This method
- // works whether the contents are a single stream or an array of
- // streams. Also works on form XObjects.
+ // Pipe a page's contents through the given pipeline. This method works whether the contents are
+ // a single stream or an array of streams. Also works on form XObjects.
QPDF_DLL
void pipeContents(Pipeline* p);
// Old name
QPDF_DLL
void pipePageContents(Pipeline* p);
- // Attach a token filter to a page's contents. If the page's
- // contents is an array of streams, it is automatically coalesced.
- // The token filter is applied to the page's contents as a single
+ // Attach a token filter to a page's contents. If the page's contents is an array of streams, it
+ // is automatically coalesced. The token filter is applied to the page's contents as a single
// stream. Also works on form XObjects.
QPDF_DLL
void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
- // A page's resources dictionary maps names to objects elsewhere
- // in the file. This method walks through a page's contents and
- // keeps tracks of which resources are referenced somewhere in the
- // contents. Then it removes from the resources dictionary any
- // object that is not referenced in the contents. This operation
- // is most useful after calling
- // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This
- // method is used by page splitting code to avoid copying unused
- // objects in files that used shared resource dictionaries across
- // multiple pages. This method recurses into form XObjects and can
- // be called with a form XObject as well as a page.
+ // A page's resources dictionary maps names to objects elsewhere in the file. This method walks
+ // through a page's contents and keeps tracks of which resources are referenced somewhere in the
+ // contents. Then it removes from the resources dictionary any object that is not referenced in
+ // the contents. This operation is most useful after calling
+ // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This method is used by page
+ // splitting code to avoid copying unused objects in files that used shared resource
+ // dictionaries across multiple pages. This method recurses into form XObjects and can be called
+ // with a form XObject as well as a page.
QPDF_DLL
void removeUnreferencedResources();
- // Return a new QPDFPageObjectHelper that is a duplicate of the
- // page. The returned object is an indirect object that is ready
- // to be inserted into the same or a different QPDF object using
- // any of the addPage methods in QPDFPageDocumentHelper or QPDF.
- // Without calling one of those methods, the page will not be
- // added anywhere. The new page object shares all content streams
- // and indirect object resources with the original page, so if you
- // are going to modify the contents or other aspects of the page,
- // you will need to handling copying of the component parts
- // separately.
+ // Return a new QPDFPageObjectHelper that is a duplicate of the page. The returned object is an
+ // indirect object that is ready to be inserted into the same or a different QPDF object using
+ // any of the addPage methods in QPDFPageDocumentHelper or QPDF. Without calling one of those
+ // methods, the page will not be added anywhere. The new page object shares all content streams
+ // and indirect object resources with the original page, so if you are going to modify the
+ // contents or other aspects of the page, you will need to handling copying of the component
+ // parts separately.
QPDF_DLL
QPDFPageObjectHelper shallowCopyPage();
- // Return a transformation matrix whose effect is the same as the
- // page's /Rotate and /UserUnit parameters. If invert is true,
- // return a matrix whose effect is the opposite. The regular
- // matrix is suitable for taking something from this page to put
- // elsewhere, and the second one is suitable for putting something
- // else onto this page. The page's TrimBox is used as the bounding
- // box for purposes of computing the matrix.
+ // Return a transformation matrix whose effect is the same as the page's /Rotate and /UserUnit
+ // parameters. If invert is true, return a matrix whose effect is the opposite. The regular
+ // matrix is suitable for taking something from this page to put elsewhere, and the second one
+ // is suitable for putting something else onto this page. The page's TrimBox is used as the
+ // bounding box for purposes of computing the matrix.
QPDF_DLL
QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false);
- // Return a form XObject that draws this page. This is useful for
- // n-up operations, underlay, overlay, thumbnail generation, or
- // any other case in which it is useful to replicate the contents
- // of a page in some other context. The dictionaries are shallow
- // copies of the original page dictionary, and the contents are
- // coalesced from the page's contents. The resulting object handle
- // is not referenced anywhere. If handle_transformations is true,
- // the resulting form XObject's /Matrix will be set to replicate
- // rotation (/Rotate) and scaling (/UserUnit) in the page's
- // dictionary. In this way, the page's transformations will be
- // preserved when placing this object on another page.
+ // Return a form XObject that draws this page. This is useful for n-up operations, underlay,
+ // overlay, thumbnail generation, or any other case in which it is useful to replicate the
+ // contents of a page in some other context. The dictionaries are shallow copies of the original
+ // page dictionary, and the contents are coalesced from the page's contents. The resulting
+ // object handle is not referenced anywhere. If handle_transformations is true, the resulting
+ // form XObject's /Matrix will be set to replicate rotation (/Rotate) and scaling (/UserUnit) in
+ // the page's dictionary. In this way, the page's transformations will be preserved when placing
+ // this object on another page.
QPDF_DLL
QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true);
- // Return content stream text that will place the given form
- // XObject (fo) using the resource name "name" on this page
- // centered within the given rectangle. If invert_transformations
- // is true, the effect of any rotation (/Rotate) and scaling
- // (/UserUnit) applied to the current page will be inverted in the
- // form XObject placement. This will cause the form XObject's
- // absolute orientation to be preserved. You could overlay one
- // page on another by calling getFormXObjectForPage on the
- // original page, QPDFObjectHandle::getUniqueResourceName on the
- // destination page's Resources dictionary to generate a name for
- // the resulting object, and calling placeFormXObject on the
- // destination page. Then insert the new fo (or, if it comes from
- // a different file, the result of calling copyForeignObject on
- // it) into the resources dictionary using name, and append or
- // prepend the content to the page's content streams. See the
- // overlay/underlay code in qpdf.cc or
- // examples/pdf-overlay-page.cc for an example. From qpdf 10.0.0,
- // the allow_shrink and allow_expand parameters control whether
- // the form XObject is allowed to be shrunk or expanded to stay
- // within or maximally fill the destination rectangle. The default
- // values are for backward compatibility with the pre-10.0.0
- // behavior.
+ // Return content stream text that will place the given form XObject (fo) using the resource
+ // name "name" on this page centered within the given rectangle. If invert_transformations is
+ // true, the effect of any rotation (/Rotate) and scaling (/UserUnit) applied to the current
+ // page will be inverted in the form XObject placement. This will cause the form XObject's
+ // absolute orientation to be preserved. You could overlay one page on another by calling
+ // getFormXObjectForPage on the original page, QPDFObjectHandle::getUniqueResourceName on the
+ // destination page's Resources dictionary to generate a name for the resulting object, and
+ // calling placeFormXObject on the destination page. Then insert the new fo (or, if it comes
+ // from a different file, the result of calling copyForeignObject on it) into the resources
+ // dictionary using name, and append or prepend the content to the page's content streams. See
+ // the overlay/underlay code in qpdf.cc or examples/pdf-overlay-page.cc for an example. From
+ // qpdf 10.0.0, the allow_shrink and allow_expand parameters control whether the form XObject is
+ // allowed to be shrunk or expanded to stay within or maximally fill the destination rectangle.
+ // The default values are for backward compatibility with the pre-10.0.0 behavior.
QPDF_DLL
std::string placeFormXObject(
QPDFObjectHandle fo,
@@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool allow_shrink = true,
bool allow_expand = false);
- // Alternative version that also fills in the transformation
- // matrix that was used.
+ // Alternative version that also fills in the transformation matrix that was used.
QPDF_DLL
std::string placeFormXObject(
QPDFObjectHandle fo,
@@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool allow_shrink = true,
bool allow_expand = false);
- // Return the transformation matrix that translates from the given
- // form XObject's coordinate system into the given rectangular
- // region on the page. The parameters have the same meaning as for
- // placeFormXObject.
+ // Return the transformation matrix that translates from the given form XObject's coordinate
+ // system into the given rectangular region on the page. The parameters have the same meaning as
+ // for placeFormXObject.
QPDF_DLL
QPDFMatrix getMatrixForFormXObjectPlacement(
QPDFObjectHandle fo,
@@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool allow_shrink = true,
bool allow_expand = false);
- // If a page is rotated using /Rotate in the page's dictionary,
- // instead rotate the page by the same amount by altering the
- // contents and removing the /Rotate key. This method adjusts the
- // various page bounding boxes (/MediaBox, etc.) so that the page
- // will have the same semantics. This can be useful to work around
- // problems with PDF applications that can't properly handle
- // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it
- // will be used for resolving any form fields that have to be
- // rotated. If not, one will be created inside the function, which
+ // If a page is rotated using /Rotate in the page's dictionary, instead rotate the page by the
+ // same amount by altering the contents and removing the /Rotate key. This method adjusts the
+ // various page bounding boxes (/MediaBox, etc.) so that the page will have the same semantics.
+ // This can be useful to work around problems with PDF applications that can't properly handle
+ // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it will be used for resolving any
+ // form fields that have to be rotated. If not, one will be created inside the function, which
// is less efficient.
QPDF_DLL
void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr);
- // Copy annotations from another page into this page. The other
- // page may be from the same QPDF or from a different QPDF. Each
- // annotation's rectangle is transformed by the given matrix. If
- // the annotation is a widget annotation that is associated with a
- // form field, the form field is copied into this document's
- // AcroForm dictionary as well. You can use this to copy
- // annotations from a page that was converted to a form XObject
- // and added to another page. For example of this, see
- // examples/pdf-overlay-page.cc. This method calls
- // QPDFAcroFormDocumentHelper::transformAnnotations, which will
- // copy annotations and form fields so that you can copy
- // annotations from a source page to any number of other pages,
- // even with different matrices, and maintain independence from
- // the original annotations. See also
- // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be
- // used if you copy a page and want to repair the annotations on
- // the destination page to make them independent from the original
- // page's annotations.
+ // Copy annotations from another page into this page. The other page may be from the same QPDF
+ // or from a different QPDF. Each annotation's rectangle is transformed by the given matrix. If
+ // the annotation is a widget annotation that is associated with a form field, the form field is
+ // copied into this document's AcroForm dictionary as well. You can use this to copy annotations
+ // from a page that was converted to a form XObject and added to another page. For example of
+ // this, see examples/pdf-overlay-page.cc. This method calls
+ // QPDFAcroFormDocumentHelper::transformAnnotations, which will copy annotations and form fields
+ // so that you can copy annotations from a source page to any number of other pages, even with
+ // different matrices, and maintain independence from the original annotations. See also
+ // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be used if you copy a page and
+ // want to repair the annotations on the destination page to make them independent from the
+ // original page's annotations.
//
- // If you pass in a QPDFAcroFormDocumentHelper*, the method will
- // use that instead of creating one in the function. Creating
- // QPDFAcroFormDocumentHelper objects is expensive, so if you're
- // doing a lot of copying, it can be more efficient to create
- // these outside and pass them in.
+ // If you pass in a QPDFAcroFormDocumentHelper*, the method will use that instead of creating
+ // one in the function. Creating QPDFAcroFormDocumentHelper objects is expensive, so if you're
+ // doing a lot of copying, it can be more efficient to create these outside and pass them in.
QPDF_DLL
void copyAnnotations(
QPDFPageObjectHelper from_page,
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index b07b7c0b..d8bc43b6 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -2,22 +2,19 @@
//
// This file is part of qpdf.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under
+// the License.
//
-// Versions of qpdf prior to version 7 were released under the terms
-// of version 2.0 of the Artistic License. At your option, you may
-// continue to consider qpdf to be licensed under those terms. Please
-// see the manual for additional information.
+// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
+// License. At your option, you may continue to consider qpdf to be licensed under those terms.
+// Please see the manual for additional information.
#ifndef QPDFTOKENIZER_HH
#define QPDFTOKENIZER_HH
@@ -34,9 +31,8 @@
class QPDFTokenizer
{
public:
- // Token type tt_eof is only returned of allowEOF() is called on
- // the tokenizer. tt_eof was introduced in QPDF version 4.1.
- // tt_space, tt_comment, and tt_inline_image were added in QPDF
+ // Token type tt_eof is only returned of allowEOF() is called on the tokenizer. tt_eof was
+ // introduced in QPDF version 4.1. tt_space, tt_comment, and tt_inline_image were added in QPDF
// version 8.
enum token_type_e {
tt_bad,
@@ -132,72 +128,65 @@ class QPDFTokenizer
QPDF_DLL
QPDFTokenizer();
- // If called, treat EOF as a separate token type instead of an
- // error. This was introduced in QPDF 4.1 to facilitate
- // tokenizing content streams.
+ // If called, treat EOF as a separate token type instead of an error. This was introduced in
+ // QPDF 4.1 to facilitate tokenizing content streams.
QPDF_DLL
void allowEOF();
- // If called, readToken will return "ignorable" tokens for space
- // and comments. This was added in QPDF 8.
+ // If called, readToken will return "ignorable" tokens for space and comments. This was added in
+ // QPDF 8.
QPDF_DLL
void includeIgnorable();
- // There are two modes of operation: push and pull. The pull
- // method is easier but requires an input source. The push method
- // is more complicated but can be used to tokenize a stream of
+ // There are two modes of operation: push and pull. The pull method is easier but requires an
+ // input source. The push method is more complicated but can be used to tokenize a stream of
// incoming characters in a pipeline.
// Push mode:
- // Keep presenting characters with presentCharacter() and
- // presentEOF() and calling getToken() until getToken() returns
- // true. When it does, be sure to check unread_ch and to unread ch
- // if it is true.
+ // Keep presenting characters with presentCharacter() and presentEOF() and calling getToken()
+ // until getToken() returns true. When it does, be sure to check unread_ch and to unread ch if
+ // it is true.
- // It these are called when a token is available, an exception
- // will be thrown.
+ // It these are called when a token is available, an exception will be thrown.
QPDF_DLL
void presentCharacter(char ch);
QPDF_DLL
void presentEOF();
- // If a token is available, return true and initialize token with
- // the token, unread_char with whether or not we have to unread
- // the last character, and if unread_char, ch with the character
- // to unread.
+ // If a token is available, return true and initialize token with the token, unread_char with
+ // whether or not we have to unread the last character, and if unread_char, ch with the
+ // character to unread.
QPDF_DLL
bool getToken(Token& token, bool& unread_char, char& ch);
- // This function returns true of the current character is between
- // tokens (i.e., white space that is not part of a string) or is
- // part of a comment. A tokenizing filter can call this to
+ // This function returns true of the current character is between tokens (i.e., white space that
+ // is not part of a string) or is part of a comment. A tokenizing filter can call this to
// determine whether to output the character.
QPDF_DLL
bool betweenTokens();
// Pull mode:
- // Read a token from an input source. Context describes the
- // context in which the token is being read and is used in the
- // exception thrown if there is an error. After a token is read,
- // the position of the input source returned by input->tell()
- // points to just after the token, and the input source's "last
- // offset" as returned by input->getLastOffset() points to the
+ // Read a token from an input source. Context describes the context in which the token is being
+ // read and is used in the exception thrown if there is an error. After a token is read, the
+ // position of the input source returned by input->tell() points to just after the token, and
+ // the input source's "last offset" as returned by input->getLastOffset() points to the
// beginning of the token.
QPDF_DLL
Token readToken(
+ InputSource& input, std::string const& context, bool allow_bad = false, size_t max_len = 0);
+ QPDF_DLL
+ Token readToken(
std::shared_ptr<InputSource> input,
std::string const& context,
bool allow_bad = false,
size_t max_len = 0);
- // Calling this method puts the tokenizer in a state for reading
- // inline images. You should call this method after reading the
- // character following the ID operator. In that state, it will
- // return all data up to BUT NOT INCLUDING the next EI token.
- // After you call this method, the next call to readToken (or the
- // token created next time getToken returns true) will either be
+ // Calling this method puts the tokenizer in a state for reading inline images. You should call
+ // this method after reading the character following the ID operator. In that state, it will
+ // return all data up to BUT NOT INCLUDING the next EI token. After you call this method, the
+ // next call to readToken (or the token created next time getToken returns true) will either be
// tt_inline_image or tt_bad. This is the only way readToken
// returns a tt_inline_image token.
QPDF_DLL
@@ -206,21 +195,18 @@ class QPDFTokenizer
private:
friend class QPDFParser;
- // Read a token from an input source. Context describes the
- // context in which the token is being read and is used in the
- // exception thrown if there is an error. After a token is read,
- // the position of the input source returned by input->tell()
- // points to just after the token, and the input source's "last
- // offset" as returned by input->getLastOffset() points to the
- // beginning of the token. Returns false if the token is bad
- // or if scanning produced an error message for any reason.
+ // Read a token from an input source. Context describes the context in which the token is being
+ // read and is used in the exception thrown if there is an error. After a token is read, the
+ // position of the input source returned by input->tell() points to just after the token, and
+ // the input source's "last offset" as returned by input->getLastOffset() points to the
+ // beginning of the token. Returns false if the token is bad or if scanning produced an error
+ // message for any reason.
bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0);
- // The following methods are only valid after nextToken has been called
- // and until another QPDFTokenizer method is called. They allow the results
- // of calling nextToken to be accessed without creating a Token, thus
- // avoiding copying information that may not be needed.
+ // The following methods are only valid after nextToken has been called and until another
+ // QPDFTokenizer method is called. They allow the results of calling nextToken to be accessed
+ // without creating a Token, thus avoiding copying information that may not be needed.
inline token_type_e getType() const noexcept;
inline std::string const& getValue() const noexcept;
inline std::string const& getRawValue() const noexcept;
diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc
index e144c7a4..a11d06f2 100644
--- a/libqpdf/JSON.cc
+++ b/libqpdf/JSON.cc
@@ -218,13 +218,12 @@ JSON::encode_string(std::string const& str)
while (iter != end) {
auto c = static_cast<unsigned char>(*iter);
if ((c > 34 && c != '\\') || c == ' ' || c == 33) {
- // Optimistically check that no char in str requires escaping.
- // Hopefully we can just return the input str.
+ // Optimistically check that no char in str requires escaping. Hopefully we can just
+ // return the input str.
++iter;
} else {
- // We found a char that requires escaping. Initialize result to the
- // chars scanned so far, append/replace the rest of str one char at
- // a time, and return the result.
+ // We found a char that requires escaping. Initialize result to the chars scanned so
+ // far, append/replace the rest of str one char at a time, and return the result.
std::string result{begin, iter};
for (; iter != end; ++iter) {
@@ -532,12 +531,10 @@ JSON::checkSchemaInternal(
} else if (sch_arr) {
auto n_elements = sch_arr->elements.size();
if (n_elements == 1) {
- // A single-element array in the schema allows a single
- // element in the object or a variable-length array, each
- // of whose items must conform to the single element of
- // the schema array. This doesn't apply to arrays of
- // arrays -- we fall back to the behavior of allowing a
- // single item only when the object is not an array.
+ // A single-element array in the schema allows a single element in the object or a
+ // variable-length array, each of whose items must conform to the single element of the
+ // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior
+ // of allowing a single item only when the object is not an array.
if (this_arr) {
int i = 0;
for (auto const& element: this_arr->elements) {
@@ -560,10 +557,9 @@ JSON::checkSchemaInternal(
err_prefix + " is supposed to be an array of length " + std::to_string(n_elements));
return false;
} else {
- // A multi-element array in the schema must correspond to
- // an element of the same length in the object. Each
- // element in the object is validated against the
- // corresponding element in the schema.
+ // A multi-element array in the schema must correspond to an element of the same length
+ // in the object. Each element in the object is validated against the corresponding
+ // element in the schema.
size_t i = 0;
for (auto const& element: this_arr->elements) {
checkSchemaInternal(
@@ -701,8 +697,7 @@ JSONParser::handle_u_code(
QTC::TC("libtests", "JSON 16 high high");
throw std::runtime_error(
"JSON: offset " + std::to_string(new_high_offset) +
- ": UTF-16 high surrogate found after previous high surrogate"
- " at offset " +
+ ": UTF-16 high surrogate found after previous high surrogate at offset " +
std::to_string(high_offset));
}
high_offset = new_high_offset;
@@ -713,8 +708,7 @@ JSONParser::handle_u_code(
QTC::TC("libtests", "JSON 16 low not after high");
throw std::runtime_error(
"JSON: offset " + std::to_string(offset) +
- ": UTF-16 low surrogate found not immediately after high"
- " surrogate");
+ ": UTF-16 low surrogate found not immediately after high surrogate");
}
high_offset = 0;
codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF);
@@ -797,8 +791,8 @@ JSONParser::append()
++offset;
}
-// Append current character to token, advance to next input character and
-// transition to 'next' lexer state.
+// Append current character to token, advance to next input character and transition to 'next' lexer
+// state.
inline void
JSONParser::append(lex_state_e next)
{
@@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next)
++offset;
}
-// Advance to next input character without appending the current character to
-// token.
+// Advance to next input character without appending the current character to token.
inline void
JSONParser::ignore()
{
@@ -817,8 +810,8 @@ JSONParser::ignore()
++offset;
}
-// Advance to next input character without appending the current character to
-// token and transition to 'next' lexer state.
+// Advance to next input character without appending the current character to token and transition
+// to 'next' lexer state.
inline void
JSONParser::ignore(lex_state_e next)
{
@@ -848,9 +841,8 @@ JSONParser::getToken()
if ((*p < 32 && *p >= 0)) {
if (*p == '\t' || *p == '\n' || *p == '\r') {
- // Legal white space not permitted in strings. This will always
- // end the current token (unless we are still before the start
- // of the token).
+ // Legal white space not permitted in strings. This will always end the current
+ // token (unless we are still before the start of the token).
if (lex_state == ls_top) {
ignore();
} else {
@@ -1044,8 +1036,7 @@ JSONParser::getToken()
QTC::TC("libtests", "JSON 16 dangling high");
throw std::runtime_error(
"JSON: offset " + std::to_string(high_offset) +
- ": UTF-16 high surrogate not followed by low "
- "surrogate");
+ ": UTF-16 high surrogate not followed by low surrogate");
}
ignore();
return;
@@ -1062,8 +1053,7 @@ JSONParser::getToken()
case '\\':
case '\"':
case '/':
- // \/ is allowed in json input, but so is /, so we
- // don't map / to \/ in output.
+ // \/ is allowed in json input, but so is /, so we don't map / to \/ in output.
token += *p;
break;
case 'b':
@@ -1113,8 +1103,8 @@ JSONParser::getToken()
}
}
- // We only get here if on end of input or if the last character was a
- // control character or other delimiter.
+ // We only get here if on end of input or if the last character was a control character or other
+ // delimiter.
if (!token.empty()) {
switch (lex_state) {
@@ -1189,8 +1179,7 @@ JSONParser::handleToken()
} else if (parser_state == ps_array_after_item) {
parser_state = ps_array_after_comma;
} else {
- throw std::logic_error("JSONParser::handleToken: unexpected parser"
- " state for comma");
+ throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma");
}
return;
@@ -1323,10 +1312,9 @@ JSONParser::handleToken()
if (item.isDictionary() || item.isArray()) {
stack.push_back({parser_state, item});
- // Calling container start method is postponed until after
- // adding the containers to their parent containers, if any.
- // This makes it much easier to keep track of the current
- // nesting level.
+ // Calling container start method is postponed until after adding the containers to their
+ // parent containers, if any. This makes it much easier to keep track of the current nesting
+ // level.
if (item.isDictionary()) {
if (reactor) {
reactor->dictionaryStart();
diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc
index 76c8a5d5..c3184104 100644
--- a/libqpdf/Pl_Buffer.cc
+++ b/libqpdf/Pl_Buffer.cc
@@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) :
Pl_Buffer::~Pl_Buffer()
{
- // Must be explicit and not inline -- see QPDF_DLL_CLASS in
- // README-maintainer
+ // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
}
void
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 806a8cb2..64ff4715 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -32,8 +32,8 @@
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
-// This must be a fixed value. This API returns a const reference to
-// it, and the C API relies on its being static as well.
+// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
+// being static as well.
std::string const QPDF::qpdf_version(QPDF_VERSION);
static char const* EMPTY_PDF = (
@@ -212,33 +212,26 @@ QPDF::QPDF() :
m(new Members())
{
m->tokenizer.allowEOF();
- // Generate a unique ID. It just has to be unique among all QPDF
- // objects allocated throughout the lifetime of this running
- // application.
+ // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
+ // the lifetime of this running application.
static std::atomic<unsigned long long> unique_id{0};
m->unique_id = unique_id.fetch_add(1ULL);
}
QPDF::~QPDF()
{
- // If two objects are mutually referential (through each object
- // having an array or dictionary that contains an indirect
- // reference to the other), the circular references in the
- // std::shared_ptr objects will prevent the objects from being
- // deleted. Walk through all objects in the object cache, which is
- // those objects that we read from the file, and break all
- // resolved indirect references by replacing them with an internal
- // object type representing that they have been destroyed. Note
- // that we can't break references like this at any time when the
- // QPDF object is active. The call to reset also causes all direct
- // QPDFObjectHandle objects that are reachable from this object to
- // release their association with this QPDF. Direct objects are
- // not destroyed since they can be moved to other QPDF objects
- // safely.
-
- // At this point, obviously no one is still using the QPDF object,
- // but we'll explicitly clear the xref table anyway just to
- // prevent any possibility of resolve() succeeding.
+ // If two objects are mutually referential (through each object having an array or dictionary
+ // that contains an indirect reference to the other), the circular references in the
+ // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
+ // in the object cache, which is those objects that we read from the file, and break all
+ // resolved indirect references by replacing them with an internal object type representing that
+ // they have been destroyed. Note that we can't break references like this at any time when the
+ // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
+ // are reachable from this object to release their association with this QPDF. Direct objects
+ // are not destroyed since they can be moved to other QPDF objects safely.
+
+ // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
+ // the xref table anyway just to prevent any possibility of resolve() succeeding.
m->xref_table.clear();
for (auto const& iter: m->obj_cache) {
iter.second.object->disconnect();
@@ -406,18 +399,15 @@ QPDF::findHeader()
}
p += 5;
std::string version;
- // Note: The string returned by line.c_str() is always
- // null-terminated. The code below never overruns the buffer
- // because a null character always short-circuits further
- // advancement.
+ // Note: The string returned by line.c_str() is always null-terminated. The code below never
+ // overruns the buffer because a null character always short-circuits further advancement.
bool valid = validatePDFVersion(p, version);
if (valid) {
m->pdf_version = version;
if (global_offset != 0) {
- // Empirical evidence strongly suggests that when there is
- // leading material prior to the PDF header, all explicit
- // offsets in the file are such that 0 points to the
- // beginning of the header.
+ // Empirical evidence strongly suggests that when there is leading material prior to the
+ // PDF header, all explicit offsets in the file are such that 0 points to the beginning
+ // of the header.
QTC::TC("qpdf", "QPDF global offset");
m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
}
@@ -448,14 +438,12 @@ QPDF::parse(char const* password)
if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
QTC::TC("qpdf", "QPDF not a pdf file");
warn(damagedPDF("", 0, "can't find PDF header"));
- // QPDFWriter writes files that usually require at least
- // version 1.2 for /FlateDecode
+ // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
m->pdf_version = "1.2";
}
- // PDF spec says %%EOF must be found within the last 1024 bytes of
- // the file. We add an extra 30 characters to leave room for the
- // startxref stuff.
+ // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
+ // 30 characters to leave room for the startxref stuff.
m->file->seek(0, SEEK_END);
qpdf_offset_t end_offset = m->file->tell();
qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
@@ -494,8 +482,8 @@ void
QPDF::inParse(bool v)
{
if (m->in_parse == v) {
- // This happens if QPDFParser::parse tries to
- // resolve an indirect object while it is parsing.
+ // This happens if QPDFParser::parse tries to resolve an indirect object while it is
+ // parsing.
throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug."
" Please report at https://github.com/qpdf/qpdf/issues.");
}
@@ -518,7 +506,7 @@ QPDF::warn(
qpdf_offset_t offset,
std::string const& message)
{
- warn(QPDFExc(error_code, this->getFilename(), object, offset, message));
+ warn(QPDFExc(error_code, getFilename(), object, offset, message));
}
void
@@ -534,9 +522,8 @@ void
QPDF::reconstruct_xref(QPDFExc& e)
{
if (m->reconstructed_xref) {
- // Avoid xref reconstruction infinite loops. This is getting
- // very hard to reproduce because qpdf is throwing many fewer
- // exceptions while parsing. Most situations are warnings now.
+ // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
+ // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
throw e;
}
@@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN);
qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
if (token_start >= next_line_start) {
- // don't process yet -- wait until we get to the line
- // containing this token
+ // don't process yet -- wait until we get to the line containing this token
} else if (t1.isInteger()) {
QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN);
if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
@@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc& e)
}
if (!m->trailer.isInitialized()) {
- // We could check the last encountered object to see if it was
- // an xref stream. If so, we could try to get the trailer
- // from there. This may make it possible to recover files
- // with bad startxref pointers even when they have object
- // streams.
+ // We could check the last encountered object to see if it was an xref stream. If so, we
+ // could try to get the trailer from there. This may make it possible to recover files with
+ // bad startxref pointers even when they have object streams.
throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
}
- // We could iterate through the objects looking for streams and
- // try to find objects inside of them, but it's probably not worth
- // the trouble. Acrobat can't recover files with any errors in an
- // xref stream, and this would be a real long shot anyway. If we
- // wanted to do anything that involved looking at stream contents,
- // we'd also have to call initializeEncryption() here. It's safe
- // to call it more than once.
+ // We could iterate through the objects looking for streams and try to find objects inside of
+ // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
+ // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
+ // that involved looking at stream contents, we'd also have to call initializeEncryption() here.
+ // It's safe to call it more than once.
}
void
@@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
char buf[7];
memset(buf, 0, sizeof(buf));
m->file->seek(xref_offset, SEEK_SET);
- // Some files miss the mark a little with startxref. We could
- // do a better job of searching in the neighborhood for
- // something that looks like either an xref table or stream,
- // but the simple heuristic of skipping whitespace can help
- // with the xref table case and is harmless with the stream
- // case.
+ // Some files miss the mark a little with startxref. We could do a better job of searching
+ // in the neighborhood for something that looks like either an xref table or stream, but the
+ // simple heuristic of skipping whitespace can help with the xref table case and is harmless
+ // with the stream case.
bool done = false;
bool skipped_space = false;
while (!done) {
@@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
}
m->file->read(buf, sizeof(buf) - 1);
- // The PDF spec says xref must be followed by a line
- // terminator, but files exist in the wild where it is
- // terminated by arbitrary whitespace.
+ // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
+ // where it is terminated by arbitrary whitespace.
if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
if (skipped_space) {
QTC::TC("qpdf", "QPDF xref skipped space");
@@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
: (buf[4] == ' ') ? 2
: 9999));
int skip = 4;
- // buf is null-terminated, and QUtil::is_space('\0') is
- // false, so this won't overrun.
+ // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun.
while (QUtil::is_space(buf[skip])) {
++skip;
}
@@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
}
- // We no longer need the deleted_objects table, so go ahead and
- // clear it out to make sure we never depend on its being set.
+ // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
+ // never depend on its being set.
m->deleted_objects.clear();
}
bool
QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
{
- // is_space and is_digit both return false on '\0', so this will
- // not overrun the null-terminated buffer.
+ // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
+ // buffer.
char const* p = line.c_str();
char const* start = line.c_str();
@@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
bool
QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type)
{
- // is_space and is_digit both return false on '\0', so this will
- // not overrun the null-terminated buffer.
+ // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
+ // buffer.
char const* p = line.c_str();
// Skip zero or more spaces. There aren't supposed to be any.
@@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
"xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
}
if (type == 'f') {
- // Save deleted items until after we've checked the
- // XRefStm, if any.
+ // Save deleted items until after we've checked the XRefStm, if any.
deleted_items.push_back(QPDFObjGen(toI(i), f2));
} else {
insertXrefEntry(toI(i), 1, f1, f2);
@@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
} else {
if (cur_trailer.getKey("/XRefStm").isInteger()) {
- // Read the xref stream but disregard any return value
- // -- we'll use our trailer's /Prev key instead of the
- // xref stream's.
+ // Read the xref stream but disregard any return value -- we'll use our trailer's
+ // /Prev key instead of the xref stream's.
(void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
} else {
throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
@@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
num_entries += toS(indx.at(i));
}
- // entry_size and num_entries have both been validated to ensure
- // that this multiplication does not cause an overflow.
+ // entry_size and num_entries have both been validated to ensure that this multiplication does
+ // not cause an overflow.
size_t expected_size = entry_size * num_entries;
std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
@@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
bool saw_first_compressed_object = false;
- // Actual size vs. expected size check above ensures that we will
- // not overflow any buffers here. We know that entry_size *
- // num_entries is equal to the size of the buffer.
+ // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
+ // We know that entry_size * num_entries is equal to the size of the buffer.
unsigned char const* data = bp->getBuffer();
for (size_t i = 0; i < num_entries; ++i) {
// Read this entry
@@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
}
}
- // Get the object and generation number. The object number is
- // based on /Index. The generation number is 0 unless this is
- // an uncompressed object record, in which case the generation
- // number appears as the third field.
+ // Get the object and generation number. The object number is based on /Index. The
+ // generation number is 0 unless this is an uncompressed object record, in which case the
+ // generation number appears as the third field.
int obj = toI(indx.at(cur_chunk));
if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) {
std::ostringstream msg;
msg.imbue(std::locale::classic());
msg << "adding " << chunk_count << " to " << obj
- << " while computing index in xref stream would cause"
- << " an integer overflow";
+ << " while computing index in xref stream would cause an integer overflow";
throw std::range_error(msg.str());
}
obj += chunk_count;
@@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
m->first_xref_item_offset = xref_offset;
}
if (fields[0] == 0) {
- // Ignore fields[2], which we don't care about in this
- // case. This works around the issue of some PDF files
- // that put invalid values, like -1, here for deleted
- // objects.
+ // Ignore fields[2], which we don't care about in this case. This works around the issue
+ // of some PDF files that put invalid values, like -1, here for deleted objects.
fields[2] = 0;
}
insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
@@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
void
QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
{
- // Populate the xref table in such a way that the first reference
- // to an object that we see, which is the one in the latest xref
- // table in which it appears, is the one that gets stored. This
- // works because we are reading more recent appends before older
- // ones. Exception: if overwrite is true, then replace any
- // existing object. This is used in xref recovery mode, which
- // reads the file from beginning to end.
-
- // If there is already an entry for this object and generation in
- // the table, it means that a later xref table has registered this
- // object. Disregard this one.
+ // Populate the xref table in such a way that the first reference to an object that we see,
+ // which is the one in the latest xref table in which it appears, is the one that gets stored.
+ // This works because we are reading more recent appends before older ones. Exception: if
+ // overwrite is true, then replace any existing object. This is used in xref recovery mode,
+ // which reads the file from beginning to end.
+
+ // If there is already an entry for this object and generation in the table, it means that a
+ // later xref table has registered this object. Disregard this one.
{ // private scope
int gen = (f0 == 2 ? 0 : f2);
QPDFObjGen og(obj, gen);
@@ -1220,8 +1188,8 @@ QPDF::showXRefTable()
}
}
-// Resolve all objects in the xref table. If this triggers a xref table
-// reconstruction abort and return false. Otherwise return true.
+// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
+// return false. Otherwise return true.
bool
QPDF::resolveXRefTable()
{
@@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable()
return true;
}
-// Ensure all objects in the pdf file, including those in indirect
-// references, appear in the object cache.
+// Ensure all objects in the pdf file, including those in indirect references, appear in the object
+// cache.
void
QPDF::fixDanglingReferences(bool force)
{
@@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force)
size_t
QPDF::getObjectCount()
{
- // This method returns the next available indirect object number.
- // makeIndirectObject uses it for this purpose. After
- // fixDanglingReferences is called, all objects in the xref table
- // will also be in obj_cache.
+ // This method returns the next available indirect object number. makeIndirectObject uses it for
+ // this purpose. After fixDanglingReferences is called, all objects in the xref table will also
+ // be in obj_cache.
fixDanglingReferences();
QPDFObjGen og;
if (!m->obj_cache.empty()) {
@@ -1270,8 +1237,7 @@ QPDF::getObjectCount()
std::vector<QPDFObjectHandle>
QPDF::getAllObjects()
{
- // After fixDanglingReferences is called, all objects are in the
- // object cache.
+ // After fixDanglingReferences is called, all objects are in the object cache.
fixDanglingReferences();
std::vector<QPDFObjectHandle> result;
for (auto const& iter: m->obj_cache) {
@@ -1315,34 +1281,27 @@ QPDF::readObject(
auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this)
.parse(empty, false);
if (empty) {
- // Nothing in the PDF spec appears to allow empty objects, but
- // they have been encountered in actual PDF files and Adobe
- // Reader appears to ignore them.
+ // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
+ // actual PDF files and Adobe Reader appears to ignore them.
warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null"));
} else if (object.isDictionary() && (!in_object_stream)) {
// check for stream
qpdf_offset_t cur_offset = input->tell();
if (readToken(input).isWord("stream")) {
- // The PDF specification states that the word "stream"
- // should be followed by either a carriage return and
- // a newline or by a newline alone. It specifically
- // disallowed following it by a carriage return alone
- // since, in that case, there would be no way to tell
- // whether the NL in a CR NL sequence was part of the
- // stream data. However, some readers, including
- // Adobe reader, accept a carriage return by itself
- // when followed by a non-newline character, so that's
- // what we do here. We have also seen files that have
- // extraneous whitespace between the stream keyword and
- // the newline.
+ // The PDF specification states that the word "stream" should be followed by either a
+ // carriage return and a newline or by a newline alone. It specifically disallowed
+ // following it by a carriage return alone since, in that case, there would be no way to
+ // tell whether the NL in a CR NL sequence was part of the stream data. However, some
+ // readers, including Adobe reader, accept a carriage return by itself when followed by
+ // a non-newline character, so that's what we do here. We have also seen files that have
+ // extraneous whitespace between the stream keyword and the newline.
bool done = false;
while (!done) {
done = true;
char ch;
if (input->read(&ch, 1) == 0) {
- // A premature EOF here will result in some
- // other problem that will get reported at
- // another time.
+ // A premature EOF here will result in some other problem that will get reported
+ // at another time.
} else if (ch == '\n') {
// ready to read stream data
QTC::TC("qpdf", "QPDF stream with NL only");
@@ -1353,10 +1312,8 @@ QPDF::readObject(
// Ready to read stream data
QTC::TC("qpdf", "QPDF stream with CRNL");
} else {
- // Treat the \r by itself as the
- // whitespace after endstream and
- // start reading stream data in spite
- // of not having seen a newline.
+ // Treat the \r by itself as the whitespace after endstream and start
+ // reading stream data in spite of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only");
input->unreadCh(ch);
warn(damagedPDF(
@@ -1381,9 +1338,8 @@ QPDF::readObject(
}
}
- // Must get offset before accessing any additional
- // objects since resolving a previously unresolved
- // indirect object will change file position.
+ // Must get offset before accessing any additional objects since resolving a previously
+ // unresolved indirect object will change file position.
qpdf_offset_t stream_offset = input->tell();
size_t length = 0;
@@ -1427,8 +1383,7 @@ QPDF::readObject(
}
}
- // Override last_offset so that it points to the beginning of the
- // object we just read
+ // Override last_offset so that it points to the beginning of the object we just read
input->setLastOffset(offset);
return object;
}
@@ -1449,8 +1404,7 @@ size_t
QPDF::recoverStreamLength(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)
{
- // Try to reconstruct stream length by looking for
- // endstream or endobj
+ // Try to reconstruct stream length by looking for endstream or endobj
warn(damagedPDF(input, stream_offset, "attempting to recover stream length"));
PatternFinder ef(*this, &QPDF::findEndstream);
@@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength(
}
}
if (this_obj_offset && (this_og == og)) {
- // Well, we found endstream\nendobj within the space
- // allowed for this object, so we're probably in good
- // shape.
+ // Well, we found endstream\nendobj within the space allowed for this object, so we're
+ // probably in good shape.
} else {
QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
}
@@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset(
{
bool check_og = true;
if (exp_og.getObj() == 0) {
- // This method uses an expect object ID of 0 to indicate that
- // we don't know or don't care what the actual object ID is at
- // this offset. This is true when we read the xref stream and
- // linearization hint streams. In this case, we don't verify
- // the expect object ID/generation against what was read from
- // the file. There is also no reason to attempt xref recovery
- // if we get a failure in this case since the read attempt was
- // not triggered by an xref lookup.
+ // This method uses an expect object ID of 0 to indicate that we don't know or don't care
+ // what the actual object ID is at this offset. This is true when we read the xref stream
+ // and linearization hint streams. In this case, we don't verify the expect object
+ // ID/generation against what was read from the file. There is also no reason to attempt
+ // xref recovery if we get a failure in this case since the read attempt was not triggered
+ // by an xref lookup.
check_og = false;
try_recovery = false;
}
@@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset(
try_recovery = false;
}
- // Special case: if offset is 0, just return null. Some PDF
- // writers, in particular "Mac OS X 10.7.5 Quartz PDFContext", may
- // store deleted objects in the xref table as "0000000000 00000
- // n", which is not correct, but it won't hurt anything for to
- // ignore these.
+ // Special case: if offset is 0, just return null. Some PDF writers, in particular
+ // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as
+ // "0000000000 00000 n", which is not correct, but it won't hurt anything for to ignore these.
if (offset == 0) {
QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
warn(damagedPDF(0, "object has offset 0"));
@@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset(
// Will be retried below
throw e;
} else {
- // We can try reading the object anyway even if the ID
- // doesn't match.
+ // We can try reading the object anyway even if the ID doesn't match.
warn(e);
}
}
@@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset(
}
if (isUnresolved(og)) {
- // Store the object in the cache here so it gets cached
- // whether we first know the offset or whether we first know
- // the object ID and generation (in which we case we would get
- // here through resolve).
-
- // Determine the end offset of this object before and after
- // white space. We use these numbers to validate
- // linearization hint tables. Offsets and lengths of objects
- // may imply the end of an object to be anywhere between these
- // values.
+ // Store the object in the cache here so it gets cached whether we first know the offset or
+ // whether we first know the object ID and generation (in which we case we would get here
+ // through resolve).
+
+ // Determine the end offset of this object before and after white space. We use these
+ // numbers to validate linearization hint tables. Offsets and lengths of objects may imply
+ // the end of an object to be anywhere between these values.
qpdf_offset_t end_before_space = m->file->tell();
// skip over spaces
@@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset(
}
qpdf_offset_t end_after_space = m->file->tell();
if (skip_cache_if_in_xref && m->xref_table.count(og)) {
- // Ordinarily, an object gets read here when resolved
- // through xref table or stream. In the special case of
- // the xref stream and linearization hint tables, the
- // offset comes from another source. For the specific case
- // of xref streams, the xref stream is read and loaded
- // into the object cache very early in parsing.
- // Ordinarily, when a file is updated by appending, items
- // inserted into the xref table in later updates take
- // precedence over earlier items. In the special case of
- // reusing the object number previously used as the xref
- // stream, we have the following order of events:
+ // Ordinarily, an object gets read here when resolved through xref table or stream. In
+ // the special case of the xref stream and linearization hint tables, the offset comes
+ // from another source. For the specific case of xref streams, the xref stream is read
+ // and loaded into the object cache very early in parsing. Ordinarily, when a file is
+ // updated by appending, items inserted into the xref table in later updates take
+ // precedence over earlier items. In the special case of reusing the object number
+ // previously used as the xref stream, we have the following order of events:
//
// * reused object gets loaded into the xref table
// * old object is read here while reading xref streams
// * original xref entry is ignored (since already in xref table)
//
- // It is the second step that causes a problem. Even
- // though the xref table is correct in this case, the old
- // object is already in the cache and so effectively
- // prevails over the reused object. To work around this
- // issue, we have a special case for the xref stream (via
- // the skip_cache_if_in_xref): if the object is already in
- // the xref stream, don't cache what we read here.
+ // It is the second step that causes a problem. Even though the xref table is correct in
+ // this case, the old object is already in the cache and so effectively prevails over
+ // the reused object. To work around this issue, we have a special case for the xref
+ // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream,
+ // don't cache what we read here.
//
- // It is likely that the same bug may exist for
- // linearization hint tables, but the existing code uses
- // end_before_space and end_after_space from the cache, so
- // fixing that would require more significant rework. The
- // chances of a linearization hint stream being reused
- // seems smaller because the xref stream is probably the
- // highest object in the file and the linearization hint
- // stream would be some random place in the middle, so I'm
- // leaving that bug unfixed for now. If the bug were to be
- // fixed, we could use !check_og in place of
- // skip_cache_if_in_xref.
+ // It is likely that the same bug may exist for linearization hint tables, but the
+ // existing code uses end_before_space and end_after_space from the cache, so fixing
+ // that would require more significant rework. The chances of a linearization hint
+ // stream being reused seems smaller because the xref stream is probably the highest
+ // object in the file and the linearization hint stream would be some random place in
+ // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we
+ // could use !check_og in place of skip_cache_if_in_xref.
QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
} else {
updateCache(og, oh.getObj(), end_before_space, end_after_space);
@@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og)
}
if (m->resolving.count(og)) {
- // This can happen if an object references itself directly or
- // indirectly in some key that has to be resolved during
- // object parsing, such as stream length.
+ // This can happen if an object references itself directly or indirectly in some key that
+ // has to be resolved during object parsing, such as stream length.
QTC::TC("qpdf", "QPDF recursion loop in resolve");
warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
updateCache(og, QPDF_Null::create(), -1, -1);
@@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
"supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
}
- // For linearization data in the object, use the data from the
- // object stream for the objects in the stream.
+ // For linearization data in the object, use the data from the object stream for the objects in
+ // the stream.
QPDFObjGen stream_og(obj_stream_number, 0);
qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
@@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
offsets[num] = toI(offset + first);
}
- // To avoid having to read the object stream multiple times, store
- // all objects that would be found here in the cache. Remember
- // that some objects stored here might have been overridden by new
- // objects appended to the file, so it is necessary to recheck the
- // xref table and only cache what would actually be resolved here.
+ // To avoid having to read the object stream multiple times, store all objects that would be
+ // found here in the cache. Remember that some objects stored here might have been overridden
+ // by new objects appended to the file, so it is necessary to recheck the xref table and only
+ // cache what would actually be resolved here.
for (auto const& iter: offsets) {
QPDFObjGen og(iter.first, 0);
QPDFXRefEntry const& entry = m->xref_table[og];
@@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const& og)
QPDFObjectHandle
QPDF::getObject(QPDFObjGen const& og)
{
- // This method is called by the parser and therefore must not
- // resolve any objects.
+ // This method is called by the parser and therefore must not resolve any objects.
if (!isCached(og)) {
m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
}
@@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign)
{
// Here's an explanation of what's going on here.
//
- // A QPDFObjectHandle that is an indirect object has an owning
- // QPDF. The object ID and generation refers to an object in the
- // owning QPDF. When we copy the QPDFObjectHandle from a foreign
- // QPDF into the local QPDF, we have to replace all indirect
- // object references with references to the corresponding object
- // in the local file.
+ // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
+ // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
+ // foreign QPDF into the local QPDF, we have to replace all indirect object references with
+ // references to the corresponding object in the local file.
//
- // To do this, we maintain mappings from foreign object IDs to
- // local object IDs for each foreign QPDF that we are copying
- // from. The mapping is stored in an ObjCopier, which contains a
+ // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
+ // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
// mapping from the foreign ObjGen to the local QPDFObjectHandle.
//
- // To copy, we do a deep traversal of the foreign object with loop
- // detection to discover all indirect objects that are
- // encountered, stopping at page boundaries. Whenever we encounter
- // an indirect object, we check to see if we have already created
- // a local copy of it. If not, we allocate a "reserved" object
- // (or, for a stream, just a new stream) and store in the map the
+ // To copy, we do a deep traversal of the foreign object with loop detection to discover all
+ // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
+ // indirect object, we check to see if we have already created a local copy of it. If not, we
+ // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
// mapping from the foreign object ID to the new object. While we
// do this, we keep a list of objects to copy.
//
- // Once we are done with the traversal, we copy all the objects
- // that we need to copy. However, the copies will contain indirect
- // object IDs that refer to objects in the foreign file. We need
- // to replace them with references to objects in the local file.
- // This is what replaceForeignIndirectObjects does. Once we have
- // created a copy of the foreign object with all the indirect
- // references replaced with new ones in the local context, we can
- // replace the local reserved object with the copy. This mechanism
- // allows us to copy objects with circular references in any
- // order.
-
- // For streams, rather than copying the objects, we set up the
- // stream data to pull from the original stream by using a stream
- // data provider. This is done in a manner that doesn't require
- // the original QPDF object but may require the original source of
- // the stream data with special handling for immediate_copy_from.
- // This logic is also in replaceForeignIndirectObjects.
-
- // Note that we explicitly allow use of copyForeignObject on page
- // objects. It is a documented use case to copy pages this way if
- // the intention is to not update the pages tree.
+ // Once we are done with the traversal, we copy all the objects that we need to copy. However,
+ // the copies will contain indirect object IDs that refer to objects in the foreign file. We
+ // need to replace them with references to objects in the local file. This is what
+ // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
+ // all the indirect references replaced with new ones in the local context, we can replace the
+ // local reserved object with the copy. This mechanism allows us to copy objects with circular
+ // references in any order.
+
+ // For streams, rather than copying the objects, we set up the stream data to pull from the
+ // original stream by using a stream data provider. This is done in a manner that doesn't
+ // require the original QPDF object but may require the original source of the stream data with
+ // special handling for immediate_copy_from. This logic is also in
+ // replaceForeignIndirectObjects.
+
+ // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
+ // use case to copy pages this way if the intention is to not update the pages tree.
if (!foreign.isIndirect()) {
QTC::TC("qpdf", "QPDF copyForeign direct");
throw std::logic_error("QPDF::copyForeign called with direct object handle");
@@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign)
" at the beginning of copyForeignObject");
}
- // Make sure we have an object in this file for every referenced
- // object in the old file. obj_copier.object_map maps foreign
- // QPDFObjGen to local objects. For everything new that we have
- // to copy, the local object will be a reservation, unless it is a
- // stream, in which case the local object will already be a
- // stream.
+ // Make sure we have an object in this file for every referenced object in the old file.
+ // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
+ // have to copy, the local object will be a reservation, unless it is a stream, in which case
+ // the local object will already be a stream.
reserveObjects(foreign, obj_copier, true);
if (!obj_copier.visiting.empty()) {
@@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop
QTC::TC("qpdf", "QPDF replace indirect");
auto mapping = obj_copier.object_map.find(foreign.getObjGen());
if (mapping == obj_copier.object_map.end()) {
- // This case would occur if this is a reference to a Page
- // or Pages object that we didn't traverse into.
+ // This case would occur if this is a reference to a Page or Pages object that we didn't
+ // traverse into.
QTC::TC("qpdf", "QPDF replace foreign indirect with null");
result = QPDFObjectHandle::newNull();
} else {
@@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop
void
QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
{
- // This method was originally written for copying foreign streams,
- // but it is used by QPDFObjectHandle to copy streams from the
- // same QPDF object as well.
+ // This method was originally written for copying foreign streams, but it is used by
+ // QPDFObjectHandle to copy streams from the same QPDF object as well.
QPDFObjectHandle dict = result.getDict();
QPDFObjectHandle old_dict = foreign.getDict();
@@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
}
QPDFObjGen local_og(result.getObjGen());
- // Copy information from the foreign stream so we can pipe its
- // data later without keeping the original QPDF object around.
+ // Copy information from the foreign stream so we can pipe its data later without keeping the
+ // original QPDF object around.
QPDF& foreign_stream_qpdf =
foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
@@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
}
std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer();
if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
- // Pull the stream data into a buffer before attempting
- // the copy operation. Do it on the source stream so that
- // if the source stream is copied multiple times, we don't
- // have to keep duplicating the memory.
+ // Pull the stream data into a buffer before attempting the copy operation. Do it on the
+ // source stream so that if the source stream is copied multiple times, we don't have to
+ // keep duplicating the memory.
QTC::TC("qpdf", "QPDF immediate copy stream data");
foreign.replaceStreamData(
foreign.getRawStreamData(),
@@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
void
QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
{
- // Force objects to be read from the input source if needed, then
- // swap them in the cache.
+ // Force objects to be read from the input source if needed, then swap them in the cache.
resolve(og1);
resolve(og2);
m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
@@ -2338,9 +2255,8 @@ QPDF::getRoot()
if (!root.isDictionary()) {
throw damagedPDF("", 0, "unable to find /Root dictionary");
} else if (
- // Check_mode is an interim solution to request #810 pending a more
- // comprehensive review of the approach to more extensive checks and
- // warning levels.
+ // Check_mode is an interim solution to request #810 pending a more comprehensive review of
+ // the approach to more extensive checks and warning levels.
m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
root.replaceKey("/Type", "/Catalog"_qpdf);
@@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map<int, int>& omap)
std::vector<QPDFObjGen>
QPDF::getCompressibleObjGens()
{
- // Return a list of objects that are allowed to be in object
- // streams. Walk through the objects by traversing the document
- // from the root, including a traversal of the pages tree. This
- // makes that objects that are on the same page are more likely to
- // be in the same object stream, which is slightly more efficient,
- // particularly with linearized files. This is better than
- // iterating through the xref table since it avoids preserving
- // orphaned items.
+ // Return a list of objects that are allowed to be in object streams. Walk through the objects
+ // by traversing the document from the root, including a traversal of the pages tree. This
+ // makes that objects that are on the same page are more likely to be in the same object stream,
+ // which is slightly more efficient, particularly with linearized files. This is better than
+ // iterating through the xref table since it avoids preserving orphaned items.
// Exclude encryption dictionary, if any
QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
@@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData(
will_retry);
}
-// Throw a generic exception when we lack context for something
-// more specific. New code should not use this. This method exists
-// to improve somewhat from calling assert in very old code.
+// Throw a generic exception when we lack context for something more specific. New code should not
+// use this. This method exists to improve somewhat from calling assert in very old code.
void
QPDF::stopOnError(std::string const& message)
{
@@ -2584,33 +2496,31 @@ QPDF::damagedPDF(
return damagedPDF(input, m->last_object_description, offset, message);
}
-// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from
-// m->file.
+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
QPDFExc
QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
{
return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message);
}
-// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from
-// m->file and the offset from .m->file->getLastOffset().
+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
+// offset from .m->file->getLastOffset().
QPDFExc
QPDF::damagedPDF(std::string const& object, std::string const& message)
{
return damagedPDF(object, m->file->getLastOffset(), message);
}
-// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from
-// m->file and the object from .m->last_object_description.
+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
+// from .m->last_object_description.
QPDFExc
QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
{
return damagedPDF(m->last_object_description, offset, message);
}
-// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from
-// m->file, the object from m->last_object_description and the offset from
-// m->file->getLastOffset().
+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
+// from m->last_object_description and the offset from m->file->getLastOffset().
QPDFExc
QPDF::damagedPDF(std::string const& message)
{
diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc
index d210d50a..aca4bf1f 100644
--- a/libqpdf/QPDFAcroFormDocumentHelper.cc
+++ b/libqpdf/QPDFAcroFormDocumentHelper.cc
@@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) :
QPDFDocumentHelper(qpdf),
m(new Members())
{
- // We have to analyze up front. Otherwise, when we are adding
- // annotations and fields, we are in a temporarily unstable
- // configuration where some widget annotations are not reachable.
+ // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in
+ // a temporarily unstable configuration where some widget annotations are not reachable.
analyze();
}
@@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector<QPDFObjectHandle>
}
if (obj.hasKey("/T")) {
- // Find something we can append to the partial name that
- // makes the fully qualified name unique. When we find
- // something, reuse the same suffix for all fields in this
- // group with the same name. We can only change the name
- // of fields that have /T, and this field's /T is always
- // at the end of the fully qualified name, appending to /T
- // has the effect of appending the same thing to the fully
- // qualified name.
+ // Find something we can append to the partial name that makes the fully qualified
+ // name unique. When we find something, reuse the same suffix for all fields in this
+ // group with the same name. We can only change the name of fields that have /T, and
+ // this field's /T is always at the end of the fully qualified name, appending to /T
+ // has the effect of appending the same thing to the fully qualified name.
std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName();
if (renames.count(old_name) == 0) {
std::string new_name = old_name;
@@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze()
fields = QPDFObjectHandle::newArray();
}
- // Traverse /AcroForm to find annotations and map them
- // bidirectionally to fields.
+ // Traverse /AcroForm to find annotations and map them bidirectionally to fields.
QPDFObjGen::set visited;
int nfields = fields.getArrayNItems();
@@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze()
traverseField(fields.getArrayItem(i), null, 0, visited);
}
- // All Widget annotations should have been encountered by
- // traversing /AcroForm, but in case any weren't, find them by
- // walking through pages, and treat any widget annotation that is
- // not associated with a field as its own field. This just ensures
- // that requesting the field for any annotation we find through a
- // page's /Annots list will have some associated field. Note that
+ // All Widget annotations should have been encountered by traversing /AcroForm, but in case any
+ // weren't, find them by walking through pages, and treat any widget annotation that is not
+ // associated with a field as its own field. This just ensures that requesting the field for any
+ // annotation we find through a page's /Annots list will have some associated field. Note that
// a file that contains this kind of error will probably not
// actually work with most viewers.
@@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze()
QPDFObjGen og(annot.getObjGen());
if (m->annotation_to_field.count(og) == 0) {
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget");
- // This is not supposed to happen, but it's easy
- // enough for us to handle this case. Treat the
- // annotation as its own field. This could allow qpdf
- // to sensibly handle a case such as a PDF creator
- // adding a self-contained annotation (merged with the
- // field dictionary) to the page's /Annots array and
- // forgetting to also put it in /AcroForm.
+ // This is not supposed to happen, but it's easy enough for us to handle this case.
+ // Treat the annotation as its own field. This could allow qpdf to sensibly handle a
+ // case such as a PDF creator adding a self-contained annotation (merged with the
+ // field dictionary) to the page's /Annots array and forgetting to also put it in
+ // /AcroForm.
annot.warnIfPossible("this widget annotation is not"
" reachable from /AcroForm in the document catalog");
m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot);
@@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField(
QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited)
{
if (depth > 100) {
- // Arbitrarily cut off recursion at a fixed depth to avoid
- // specially crafted files that could cause stack overflow.
+ // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that
+ // could cause stack overflow.
return;
}
if (!field.isIndirect()) {
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field");
- field.warnIfPossible("encountered a direct object as a field or annotation while"
- " traversing /AcroForm; ignoring field or annotation");
+ field.warnIfPossible("encountered a direct object as a field or annotation while "
+ "traversing /AcroForm; ignoring field or annotation");
return;
}
if (!field.isDictionary()) {
@@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField(
return;
}
- // A dictionary encountered while traversing the /AcroForm field
- // may be a form field, an annotation, or the merger of the two. A
- // field that has no fields below it is a terminal. If a terminal
- // field looks like an annotation, it is an annotation because
- // annotation dictionary fields can be merged with terminal field
- // dictionaries. Otherwise, the annotation fields might be there
- // to be inherited by annotations below it.
+ // A dictionary encountered while traversing the /AcroForm field may be a form field, an
+ // annotation, or the merger of the two. A field that has no fields below it is a terminal. If a
+ // terminal field looks like an annotation, it is an annotation because annotation dictionary
+ // fields can be merged with terminal field dictionaries. Otherwise, the annotation fields might
+ // be there to be inherited by annotations below it.
bool is_annotation = false;
bool is_field = (0 == depth);
@@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField(
std::string name = foh.getFullyQualifiedName();
auto old = m->field_to_name.find(f_og);
if (old != m->field_to_name.end()) {
- // We might be updating after a name change, so remove any
- // old information
+ // We might be updating after a name change, so remove any old information
std::string old_name = old->second;
m->name_to_fields[old_name].erase(f_og);
}
@@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded()
for (auto& aoh: getWidgetAnnotationsForPage(page)) {
QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh);
if (ffh.getFieldType() == "/Btn") {
- // Rather than generating appearances for button
- // fields, rely on what's already there. Just make
- // sure /AS is consistent with /V, which we can do by
- // resetting the value of the field back to itself.
- // This code is referenced in a comment in
+ // Rather than generating appearances for button fields, rely on what's already
+ // there. Just make sure /AS is consistent with /V, which we can do by resetting the
+ // value of the field back to itself. This code is referenced in a comment in
// QPDFFormFieldObjectHelper::generateAppearance.
if (ffh.isRadioButton() || ffh.isCheckbox()) {
ffh.setV(ffh.getValue());
@@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields(
bool override_q,
int from_default_q)
{
- // Override /Q or /DA if needed. If this object has a field type,
- // directly or inherited, it is a field and not just an
- // annotation. In that case, we need to override if we are getting
- // a value from the document that is different from the value we
- // would have gotten from the old document. We must take care not
- // to override an explicit value. It's possible that /FT may be
- // inherited by lower fields that may explicitly set /DA or /Q or
- // that this is a field whose type does not require /DA or /Q and
- // we may be put a value on the field that is unused. This is
- // harmless, so it's not worth trying to work around.
+ // Override /Q or /DA if needed. If this object has a field type, directly or inherited, it is a
+ // field and not just an annotation. In that case, we need to override if we are getting a value
+ // from the document that is different from the value we would have gotten from the old
+ // document. We must take care not to override an explicit value. It's possible that /FT may be
+ // inherited by lower fields that may explicitly set /DA or /Q or that this is a field whose
+ // type does not require /DA or /Q and we may be put a value on the field that is unused. This
+ // is harmless, so it's not worth trying to work around.
auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) {
if (field.getObjectHandle().hasKey(key)) {
@@ -550,45 +533,36 @@ void
QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map)
{
- // This method is called on a field that has been copied from
- // another file but whose /DA still refers to resources in the
- // original file's /DR.
-
- // When appearance streams are generated for variable text fields
- // (see ISO 32000 PDF spec section 12.7.3.3), the field's /DA is
- // used to generate content of the appearance stream. /DA contains
- // references to resources that may be resolved in the document's
- // /DR dictionary, which appears in the document's /AcroForm
- // dictionary. For fields that we copied from other documents, we
- // need to ensure that resources are mapped correctly in the case
- // of conflicting names. For example, if a.pdf's /DR has /F1
- // pointing to one font and b.pdf's /DR also has /F1 but it points
- // elsewhere, we need to make sure appearance streams of fields
- // copied from b.pdf into a.pdf use whatever font /F1 meant in
- // b.pdf, not whatever it means in a.pdf. This method takes care
- // of that. It is only called on fields copied from foreign files.
+ // This method is called on a field that has been copied from another file but whose /DA still
+ // refers to resources in the original file's /DR.
+
+ // When appearance streams are generated for variable text fields (see ISO 32000 PDF spec
+ // section 12.7.3.3), the field's /DA is used to generate content of the appearance stream. /DA
+ // contains references to resources that may be resolved in the document's /DR dictionary, which
+ // appears in the document's /AcroForm dictionary. For fields that we copied from other
+ // documents, we need to ensure that resources are mapped correctly in the case of conflicting
+ // names. For example, if a.pdf's /DR has /F1 pointing to one font and b.pdf's /DR also has /F1
+ // but it points elsewhere, we need to make sure appearance streams of fields copied from b.pdf
+ // into a.pdf use whatever font /F1 meant in b.pdf, not whatever it means in a.pdf. This method
+ // takes care of that. It is only called on fields copied from foreign files.
// A few notes:
//
- // * If the from document's /DR and the current document's /DR
- // have conflicting keys, we have already resolved the conflicts
- // before calling this method. The dr_map parameter contains the
- // mapping from old keys to new keys.
+ // * If the from document's /DR and the current document's /DR have conflicting keys, we have
+ // already resolved the conflicts before calling this method. The dr_map parameter contains
+ // the mapping from old keys to new keys.
//
- // * /DA may be inherited from the document's /AcroForm
- // dictionary. By the time this method has been called, we have
- // already copied any document-level values into the fields to
- // avoid having them inherit from the new document. This was
- // done in adjustInheritedFields.
+ // * /DA may be inherited from the document's /AcroForm dictionary. By the time this method has
+ // been called, we have already copied any document-level values into the fields to avoid
+ // having them inherit from the new document. This was done in adjustInheritedFields.
auto DA = obj.getKey("/DA");
if (!DA.isString()) {
return;
}
- // Find names in /DA. /DA is a string that contains content
- // stream-like code, so we create a stream out of the string and
- // then filter it. We don't attach the stream to anything, so it
+ // Find names in /DA. /DA is a string that contains content stream-like code, so we create a
+ // stream out of the string and then filter it. We don't attach the stream to anything, so it
// will get discarded.
ResourceFinder rf;
auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value());
@@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error");
}
} catch (std::exception& e) {
- // No way to reproduce in test suite right now since error
- // conditions are converted to warnings.
+ // No way to reproduce in test suite right now since error conditions are converted to
+ // warnings.
obj.warnIfPossible(
std::string("Unable to parse /DA: ") + e.what() +
"; this form field may not update properly");
@@ -620,15 +594,12 @@ void
QPDFAcroFormDocumentHelper::adjustAppearanceStream(
QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map)
{
- // We don't have to modify appearance streams or their resource
- // dictionaries for them to display properly, but we need to do so
- // to make them save to regenerate. Suppose an appearance stream
- // as a font /F1 that is different from /F1 in /DR, and that when
- // we copy the field, /F1 is remapped to /F1_1. When the field is
- // regenerated, /F1_1 won't appear in the stream's resource
- // dictionary, so the regenerated appearance stream will revert to
- // the /F1_1 in /DR. If we adjust existing appearance streams, we
- // are protected from this problem.
+ // We don't have to modify appearance streams or their resource dictionaries for them to display
+ // properly, but we need to do so to make them save to regenerate. Suppose an appearance stream
+ // as a font /F1 that is different from /F1 in /DR, and that when we copy the field, /F1 is
+ // remapped to /F1_1. When the field is regenerated, /F1_1 won't appear in the stream's resource
+ // dictionary, so the regenerated appearance stream will revert to the /F1_1 in /DR. If we
+ // adjust existing appearance streams, we are protected from this problem.
auto dict = stream.getDict();
auto resources = dict.getKey("/Resources");
@@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
resources = this->qpdf.makeIndirectObject(resources);
}
dict.replaceKey("/Resources", resources);
- // Create a dictionary with top-level keys so we can use
- // mergeResources to force them to be unshared. We will also use
- // this to resolve conflicts that may already be in the resource
+ // Create a dictionary with top-level keys so we can use mergeResources to force them to be
+ // unshared. We will also use this to resolve conflicts that may already be in the resource
// dictionary.
auto merge_with = QPDFObjectHandle::newDictionary();
for (auto const& top_key: dr_map) {
merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary());
}
resources.mergeResources(merge_with);
- // Rename any keys in the resource dictionary that we
- // remapped.
+ // Rename any keys in the resource dictionary that we remapped.
for (auto const& i1: dr_map) {
std::string const& top_key = i1.first;
auto subdict = resources.getKey(top_key);
@@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
std::string const& new_key = i2.second;
auto existing_new = subdict.getKey(new_key);
if (!existing_new.isNull()) {
- // The resource dictionary already has a key in it
- // matching what we remapped an old key to, so we'll
- // have to move it out of the way. Stick it in
- // merge_with, which we will re-merge with the
- // dictionary when we're done. We know merge_with
- // already has dictionaries for all the top keys.
+ // The resource dictionary already has a key in it matching what we remapped an old
+ // key to, so we'll have to move it out of the way. Stick it in merge_with, which we
+ // will re-merge with the dictionary when we're done. We know merge_with already has
+ // dictionaries for all the top keys.
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict");
merge_with.getKey(top_key).replaceKey(new_key, existing_new);
}
@@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
}
}
}
- // Deal with any any conflicts by re-merging with merge_with and
- // updating our local copy of dr_map, which we will use to modify
- // the stream contents.
+ // Deal with any any conflicts by re-merging with merge_with and updating our local copy of
+ // dr_map, which we will use to modify the stream contents.
resources.mergeResources(merge_with, &dr_map);
// Remove empty subdictionaries
for (auto iter: resources.ditems()) {
@@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr);
stream.addTokenFilter(tf);
} catch (std::exception& e) {
- // No way to reproduce in test suite right now since error
- // conditions are converted to warnings.
+ // No way to reproduce in test suite right now since error conditions are converted to
+ // warnings.
stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what());
}
}
@@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
}
bool foreign = (from_qpdf != &this->qpdf);
- // It's possible that we will transform annotations that don't
- // include any form fields. This code takes care not to muck
- // around with /AcroForm unless we have to.
+ // It's possible that we will transform annotations that don't include any form fields. This
+ // code takes care not to muck around with /AcroForm unless we have to.
QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm");
QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm");
- // /DA and /Q may be inherited from the document-level /AcroForm
- // dictionary. If we are copying a foreign stream and the stream
- // is getting one of these values from its document's /AcroForm,
- // we will need to copy the value explicitly so that it doesn't
- // start getting its default from the destination document.
+ // /DA and /Q may be inherited from the document-level /AcroForm dictionary. If we are copying a
+ // foreign stream and the stream is getting one of these values from its document's /AcroForm,
+ // we will need to copy the value explicitly so that it doesn't start getting its default from
+ // the destination document.
bool override_da = false;
bool override_q = false;
std::string from_default_da;
int from_default_q = 0;
- // If we copy any form fields, we will need to merge the source
- // document's /DR into this document's /DR.
+ // If we copy any form fields, we will need to merge the source document's /DR into this
+ // document's /DR.
QPDFObjectHandle from_dr = QPDFObjectHandle::newNull();
if (foreign) {
std::string default_da;
@@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
}
}
- // If we have to merge /DR, we will need a mapping of conflicting
- // keys for rewriting /DA. Set this up for lazy initialization in
- // case we encounter any form fields.
+ // If we have to merge /DR, we will need a mapping of conflicting keys for rewriting /DA. Set
+ // this up for lazy initialization in case we encounter any form fields.
std::map<std::string, std::map<std::string, std::string>> dr_map;
bool initialized_dr_map = false;
QPDFObjectHandle dr = QPDFObjectHandle::newNull();
@@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
if (!dr.isIndirect()) {
dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr));
}
- // Merge the other document's /DR, creating a conflict
- // map. mergeResources checks to make sure both objects
- // are dictionaries. By this point, if this is foreign,
- // from_dr has been copied, so we use the target qpdf as
- // the owning qpdf.
+ // Merge the other document's /DR, creating a conflict map. mergeResources checks to
+ // make sure both objects are dictionaries. By this point, if this is foreign, from_dr
+ // has been copied, so we use the target qpdf as the owning qpdf.
from_dr.makeResourcesIndirect(this->qpdf);
dr.mergeResources(from_dr, &dr_map);
@@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
}
};
- // This helper prevents us from copying the same object
- // multiple times.
+ // This helper prevents us from copying the same object multiple times.
std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy;
auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) {
auto og = to_copy.getObjGen();
@@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
continue;
}
- // Make copies of annotations and fields down to the
- // appearance streams, preserving all internal referential
- // integrity. When the incoming annotations are from a
- // different file, we first copy them locally. Then, whether
- // local or foreign, we copy them again so that if we bring
- // the same annotation in multiple times (e.g. overlaying a
- // foreign page onto multiple local pages or a local page onto
- // multiple other local pages), we don't create annotations
- // that are referenced in more than one place. If we did that,
- // the effect of applying transformations would be cumulative,
- // which is definitely not what we want. Besides, annotations
- // and fields are not intended to be referenced in multiple
- // places.
-
- // Determine if this annotation is attached to a form field.
- // If so, the annotation may be the same object as the form
- // field, or the form field may have the annotation as a kid.
- // In either case, we have to walk up the field structure to
- // find the top-level field. Within one iteration through a
- // set of annotations, we don't want to copy the same item
- // more than once. For example, suppose we have field A with
- // kids B, C, and D, each of which has annotations BA, CA, and
- // DA. When we get to BA, we will find that BA is a kid of B
- // which is under A. When we do a copyForeignObject of A, it
- // will also copy everything else because of the indirect
- // references. When we clone BA, we will want to clone A and
- // then update A's clone's kid to point B's clone and B's
- // clone's parent to point to A's clone. The same thing holds
- // for annotations. Next, when we get to CA, we will again
- // discover that A is the top, but we don't want to re-copy A.
- // We want CA's clone to be linked to the same clone as BA's.
- // Failure to do this will break up things like radio button
- // groups, which all have to kids of the same parent.
+ // Make copies of annotations and fields down to the appearance streams, preserving all
+ // internal referential integrity. When the incoming annotations are from a different file,
+ // we first copy them locally. Then, whether local or foreign, we copy them again so that if
+ // we bring the same annotation in multiple times (e.g. overlaying a foreign page onto
+ // multiple local pages or a local page onto multiple other local pages), we don't create
+ // annotations that are referenced in more than one place. If we did that, the effect of
+ // applying transformations would be cumulative, which is definitely not what we want.
+ // Besides, annotations and fields are not intended to be referenced in multiple places.
+
+ // Determine if this annotation is attached to a form field. If so, the annotation may be
+ // the same object as the form field, or the form field may have the annotation as a kid. In
+ // either case, we have to walk up the field structure to find the top-level field. Within
+ // one iteration through a set of annotations, we don't want to copy the same item more than
+ // once. For example, suppose we have field A with kids B, C, and D, each of which has
+ // annotations BA, CA, and DA. When we get to BA, we will find that BA is a kid of B which
+ // is under A. When we do a copyForeignObject of A, it will also copy everything else
+ // because of the indirect references. When we clone BA, we will want to clone A and then
+ // update A's clone's kid to point B's clone and B's clone's parent to point to A's clone.
+ // The same thing holds for annotations. Next, when we get to CA, we will again discover
+ // that A is the top, but we don't want to re-copy A. We want CA's clone to be linked to the
+ // same clone as BA's. Failure to do this will break up things like radio button groups,
+ // which all have to kids of the same parent.
auto ffield = from_afdh->getFieldForAnnotation(annot);
auto ffield_oh = ffield.getObjectHandle();
@@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
} else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) {
ffield_oh.warnIfPossible("ignoring form field not indirect");
} else if (!ffield_oh.isNull()) {
- // A field and its associated annotation can be the same
- // object. This matters because we don't want to clone the
- // annotation and field separately in this case.
+ // A field and its associated annotation can be the same object. This matters because we
+ // don't want to clone the annotation and field separately in this case.
have_field = true;
// Find the top-level field. It may be the field itself.
top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();
if (foreign) {
- // copyForeignObject returns the same value if called
- // multiple times with the same field. Create/retrieve
- // the local copy of the original field. This pulls
- // over everything the field references including
- // annotations and appearance streams, but it's
- // harmless to call copyForeignObject on them too.
- // They will already be copied, so we'll get the right
- // object back.
+ // copyForeignObject returns the same value if called multiple times with the same
+ // field. Create/retrieve the local copy of the original field. This pulls over
+ // everything the field references including annotations and appearance streams, but
+ // it's harmless to call copyForeignObject on them too. They will already be copied,
+ // so we'll get the right object back.
// top_field and ffield_oh are known to be indirect.
top_field = this->qpdf.copyForeignObject(top_field);
ffield_oh = this->qpdf.copyForeignObject(ffield_oh);
} else {
- // We don't need to add top_field to old_fields if
- // it's foreign because the new copy of the foreign
- // field won't be referenced anywhere. It's just the
- // starting point for us to make an additional local
- // copy of.
+ // We don't need to add top_field to old_fields if it's foreign because the new copy
+ // of the foreign field won't be referenced anywhere. It's just the starting point
+ // for us to make an additional local copy of.
old_fields.insert(top_field.getObjGen());
}
- // Traverse the field, copying kids, and preserving
- // integrity.
+ // Traverse the field, copying kids, and preserving integrity.
std::list<QPDFObjectHandle> queue;
QPDFObjGen::set seen;
if (maybe_copy_object(top_field)) {
@@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
parent.warnIfPossible(
"while traversing field " + obj.getObjGen().unparse(',') +
", found parent (" + parent_og.unparse(',') +
- ") that had not been seen, indicating likely"
- " invalid field structure");
+ ") that had not been seen, indicating likely invalid field "
+ "structure");
}
}
auto kids = obj.getKey("/Kids");
@@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
if (foreign) {
// Lazily initialize our /DR and the conflict map.
init_dr_map();
- // The spec doesn't say anything about /DR on the
- // field, but lots of writers put one there, and
- // it is frequently the same as the document-level
- // /DR. To avoid having the field's /DR point to
- // information that we are not maintaining, just
- // reset it to that if it exists. Empirical
- // evidence suggests that many readers, including
- // Acrobat, Adobe Acrobat Reader, chrome, firefox,
- // the mac Preview application, and several of the
- // free readers on Linux all ignore /DR at the
- // field level.
+ // The spec doesn't say anything about /DR on the field, but lots of writers
+ // put one there, and it is frequently the same as the document-level /DR.
+ // To avoid having the field's /DR point to information that we are not
+ // maintaining, just reset it to that if it exists. Empirical evidence
+ // suggests that many readers, including Acrobat, Adobe Acrobat Reader,
+ // chrome, firefox, the mac Preview application, and several of the free
+ // readers on Linux all ignore /DR at the field level.
if (obj.hasKey("/DR")) {
obj.replaceKey("/DR", dr);
}
@@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
}
}
- // Now we can safely mutate the annotation and its appearance
- // streams.
+ // Now we can safely mutate the annotation and its appearance streams.
for (auto& stream: streams) {
auto dict = stream.getDict();
auto omatrix = dict.getKey("/Matrix");
diff --git a/libqpdf/QPDFFormFieldObjectHelper.cc b/libqpdf/QPDFFormFieldObjectHelper.cc
index ef5570f3..67975451 100644
--- a/libqpdf/QPDFFormFieldObjectHelper.cc
+++ b/libqpdf/QPDFFormFieldObjectHelper.cc
@@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances)
}
}
if (!okay) {
- this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a"
- " value of other than /Yes or /Off");
+ this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a value of "
+ "other than /Yes or /Off");
}
} else if (isRadioButton()) {
if (value.isName()) {
setRadioButtonValue(value);
} else {
- this->oh.warnIfPossible("ignoring attempt to set a radio button field to"
- " an object that is not a name");
+ this->oh.warnIfPossible(
+ "ignoring attempt to set a radio button field to an object that is not a name");
}
} else if (isPushbutton()) {
this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field");
@@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const& utf8_value, bool need_appeara
void
QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name)
{
- // Set the value of a radio button field. This has the following
- // specific behavior:
- // * If this is a radio button field that has a parent that is
- // also a radio button field and has no explicit /V, call itself
- // on the parent
- // * If this is a radio button field with children, set /V to the
- // given value. Then, for each child, if the child has the
- // specified value as one of its keys in the /N subdictionary of
- // its /AP (i.e. its normal appearance stream dictionary), set
- // /AS to name; otherwise, if /Off is a member, set /AS to /Off.
- // Note that we never turn on /NeedAppearances when setting a
- // radio button field.
+ // Set the value of a radio button field. This has the following specific behavior:
+ // * If this is a radio button field that has a parent that is also a radio button field and has
+ // no explicit /V, call itself on the parent
+ // * If this is a radio button field with children, set /V to the given value. Then, for each
+ // child, if the child has the specified value as one of its keys in the /N subdictionary of
+ // its /AP (i.e. its normal appearance stream dictionary), set /AS to name; otherwise, if /Off
+ // is a member, set /AS to /Off.
+ // Note that we never turn on /NeedAppearances when setting a radio button field.
QPDFObjectHandle parent = this->oh.getKey("/Parent");
if (parent.isDictionary() && parent.getKey("/Parent").isNull()) {
QPDFFormFieldObjectHelper ph(parent);
if (ph.isRadioButton()) {
- // This is most likely one of the individual buttons. Try
- // calling on the parent.
+ // This is most likely one of the individual buttons. Try calling on the parent.
QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button");
ph.setRadioButtonValue(name);
return;
@@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name)
QPDFObjectHandle AP = kid.getKey("/AP");
QPDFObjectHandle annot;
if (AP.isNull()) {
- // The widget may be below. If there is more than one,
- // just find the first one.
+ // The widget may be below. If there is more than one, just find the first one.
QPDFObjectHandle grandkids = kid.getKey("/Kids");
if (grandkids.isArray()) {
int ngrandkids = grandkids.getArrayNItems();
@@ -458,9 +452,8 @@ void
QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh)
{
std::string ft = getFieldType();
- // Ignore field types we don't know how to generate appearances
- // for. Button fields don't really need them -- see code in
- // QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded.
+ // Ignore field types we don't know how to generate appearances for. Button fields don't really
+ // need them -- see code in QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded.
if ((ft == "/Tx") || (ft == "/Ch")) {
generateTextAppearance(aoh);
}
@@ -562,15 +555,13 @@ ValueSetter::writeAppearance()
{
this->replaced = true;
- // This code does not take quadding into consideration because
- // doing so requires font metric information, which we don't
- // have in many cases.
+ // This code does not take quadding into consideration because doing so requires font metric
+ // information, which we don't have in many cases.
double tfh = 1.2 * tf;
int dx = 1;
- // Write one or more lines, centered vertically, possibly with
- // one row highlighted.
+ // Write one or more lines, centered vertically, possibly with one row highlighted.
auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh);
bool highlight = false;
@@ -591,8 +582,7 @@ ValueSetter::writeAppearance()
}
}
if (found) {
- // Try to make the found item the second one, but
- // adjust for under/overflow.
+ // Try to make the found item the second one, but adjust for under/overflow.
int wanted_first = QIntC::to_int(found_idx) - 1;
int wanted_last = QIntC::to_int(found_idx + max_rows) - 2;
QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found");
@@ -639,9 +629,8 @@ ValueSetter::writeAppearance()
dy -= tf;
write("q\nBT\n" + DA + "\n");
for (size_t i = 0; i < nlines; ++i) {
- // We could adjust Tm to translate to the beginning the first
- // line, set TL to tfh, and use T* for each subsequent line,
- // but doing this would require extracting any Tm from DA,
+ // We could adjust Tm to translate to the beginning the first line, set TL to tfh, and use
+ // T* for each subsequent line, but doing this would require extracting any Tm from DA,
// which doesn't seem really worth the effort.
if (i == 0) {
write(
@@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token)
case QPDFTokenizer::tt_word:
if (token.isWord("Tf")) {
if ((last_num > 1.0) && (last_num < 1000.0)) {
- // These ranges are arbitrary but keep us from doing
- // insane things or suffering from over/underflow
+ // These ranges are arbitrary but keep us from doing insane things or suffering from
+ // over/underflow
tf = last_num;
}
tf_idx = last_num_idx;
@@ -738,8 +727,7 @@ TfFinder::getDA()
if (QIntC::to_int(i) == tf_idx) {
double delta = strtod(cur.c_str(), nullptr) - this->tf;
if ((delta > 0.001) || (delta < -0.001)) {
- // tf doesn't match the font size passed to Tf, so
- // substitute.
+ // tf doesn't match the font size passed to Tf, so substitute.
QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf");
cur = QUtil::double_to_string(tf);
}
@@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao
}
AS.addTokenFilter(
- // line-break
std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox)));
}
diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc
index 50ea5ea7..4c7fdd04 100644
--- a/libqpdf/QPDFJob.cc
+++ b/libqpdf/QPDFJob.cc
@@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
if (!(w_obj.isNumber() && h_obj.isNumber())) {
if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
- v << prefix << ": " << description << ": not optimizing because image dictionary"
- << " is missing required keys\n";
+ v << prefix << ": " << description
+ << ": not optimizing because image dictionary is missing required keys\n";
});
}
return result;
@@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description
- << ": not optimizing because image has other than"
- << " 8 bits per component\n";
+ << ": not optimizing because image has other than 8 bits per component\n";
});
}
return result;
}
- // Files have been seen in the wild whose width and height are
- // floating point, which is goofy, but we can deal with it.
+ // Files have been seen in the wild whose width and height are floating point, which is goofy,
+ // but we can deal with it.
JDIMENSION w = 0;
if (w_obj.isInteger()) {
w = w_obj.getUIntValueAsUInt();
@@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
QTC::TC("qpdf", "QPDFJob image optimize colorspace");
if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
- v << prefix << ": " << description << ": not optimizing because qpdf can't optimize"
- << " images with this colorspace\n";
+ v << prefix << ": " << description
+ << ": not optimizing because qpdf can't optimize images with this colorspace\n";
});
}
return result;
@@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
QTC::TC("qpdf", "QPDFJob image optimize too small");
if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
- v << prefix << ": " << description << ": not optimizing because image"
- << " is smaller than requested minimum dimensions\n";
+ v << prefix << ": " << description
+ << ": not optimizing because image is smaller than requested minimum "
+ "dimensions\n";
});
}
return result;
@@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const& description)
if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) {
QTC::TC("qpdf", "QPDFJob image optimize no pipeline");
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
- v << prefix << ": " << description << ": not optimizing because unable to decode data"
- << " or data already uses DCT\n";
+ v << prefix << ": " << description
+ << ": not optimizing because unable to decode data or data already uses DCT\n";
});
return false;
}
@@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const& description)
QTC::TC("qpdf", "QPDFJob image optimize no shrink");
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description
- << ": not optimizing because DCT compression does not"
- << " reduce image size\n";
+ << ": not optimizing because DCT compression does not reduce image size\n";
});
return false;
}
@@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline)
std::shared_ptr<Pipeline> p = makePipeline("", pipeline);
if (p == nullptr) {
// Should not be possible
- image.warnIfPossible("unable to create pipeline after previous"
- " success; image data will be lost");
+ image.warnIfPossible(
+ "unable to create pipeline after previous success; image data will be lost");
pipeline->finish();
return;
}
@@ -441,8 +440,7 @@ QPDFJob::createQPDF()
processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true);
} catch (QPDFExc& e) {
if (e.getErrorCode() == qpdf_e_password) {
- // Allow certain operations to work when an incorrect
- // password is supplied.
+ // Allow certain operations to work when an incorrect password is supplied.
if (m->check_is_encrypted || m->check_requires_password) {
m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect;
return nullptr;
@@ -464,8 +462,8 @@ QPDFJob::createQPDF()
return nullptr;
}
- // If we are updating from JSON, this has to be done first before
- // other options may cause transformations to the input.
+ // If we are updating from JSON, this has to be done first before other options may cause
+ // transformations to the input.
if (!m->update_from_json.empty()) {
pdf.updateFromJSON(m->update_from_json);
}
@@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF& pdf)
}
if (m->warnings && (!m->suppress_warnings)) {
if (createsOutput()) {
- *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings;"
- << " resulting file may have some problems\n";
+ *m->log->getWarn()
+ << m->message_prefix
+ << ": operation succeeded with warnings; resulting file may have some problems\n";
} else {
*m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n";
}
}
if (m->report_mem_usage) {
- // Call get_max_memory_usage before generating output. When
- // debugging, it's easier if print statements from
- // get_max_memory_usage are not interleaved with the output.
+ // Call get_max_memory_usage before generating output. When debugging, it's easier if print
+ // statements from get_max_memory_usage are not interleaved with the output.
auto mem_usage = QUtil::get_max_memory_usage();
*m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n";
}
@@ -568,16 +566,13 @@ QPDFJob::getExitCode() const
void
QPDFJob::checkConfiguration()
{
- // Do final checks for command-line consistency. (I always think
- // this is called doFinalChecks, so I'm putting that in a
- // comment.)
+ // Do final checks for command-line consistency. (I always think this is called doFinalChecks,
+ // so I'm putting that in a comment.)
if (m->replace_input) {
- // Check for --empty appears later after we have checked
- // m->infilename.
+ // Check for --empty appears later after we have checked m->infilename.
if (m->outfilename) {
- usage("--replace-input may not be used when"
- " an output file is specified");
+ usage("--replace-input may not be used when an output file is specified");
} else if (m->split_pages) {
usage("--split-pages may not be used with --replace-input");
} else if (m->json_version) {
@@ -585,8 +580,8 @@ QPDFJob::checkConfiguration()
}
}
if (m->json_version && (m->outfilename == nullptr)) {
- // The output file is optional with --json for backward
- // compatibility and defaults to standard output.
+ // The output file is optional with --json for backward compatibility and defaults to
+ // standard output.
m->outfilename = QUtil::make_shared_cstr("-");
}
if (m->infilename == nullptr) {
@@ -605,24 +600,21 @@ QPDFJob::checkConfiguration()
if (m->encrypt && (!m->allow_insecure) &&
(m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) {
- // Note that empty owner passwords for R < 5 are copied from
- // the user password, so this lack of security is not an issue
- // for those files. Also we are consider only the ability to
- // open the file without a password to be insecure. We are not
- // concerned about whether the viewer enforces security
- // settings when the user and owner password match.
- usage("A PDF with a non-empty user password and an empty owner"
- " password encrypted with a 256-bit key is insecure as it"
- " can be opened without a password. If you really want to"
- " do this, you must also give the --allow-insecure option"
- " before the -- that follows --encrypt.");
+ // Note that empty owner passwords for R < 5 are copied from the user password, so this lack
+ // of security is not an issue for those files. Also we are consider only the ability to
+ // open the file without a password to be insecure. We are not concerned about whether the
+ // viewer enforces security settings when the user and owner password match.
+ usage(
+ "A PDF with a non-empty user password and an empty owner password encrypted with a "
+ "256-bit key is insecure as it can be opened without a password. If you really want to"
+ " do this, you must also give the --allow-insecure option before the -- that follows "
+ "--encrypt.");
}
bool save_to_stdout = false;
if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) {
if (m->split_pages) {
- usage("--split-pages may not be used when"
- " writing to standard output");
+ usage("--split-pages may not be used when writing to standard output");
}
save_to_stdout = true;
}
@@ -634,9 +626,8 @@ QPDFJob::checkConfiguration()
}
if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) {
QTC::TC("qpdf", "QPDFJob same file error");
- usage("input file and output file are the same;"
- " use --replace-input to intentionally"
- " overwrite the input file");
+ usage("input file and output file are the same; use --replace-input to intentionally "
+ "overwrite the input file");
}
if (m->json_version == 1) {
@@ -645,8 +636,7 @@ QPDFJob::checkConfiguration()
}
} else {
if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) {
- usage("json keys \"objects\" and \"objectinfo\" are only valid for"
- " json version 1");
+ usage("json keys \"objects\" and \"objectinfo\" are only valid for json version 1");
}
}
}
@@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF& pdf)
void
QPDFJob::doCheck(QPDF& pdf)
{
- // Code below may set okay to false but not to true.
- // We assume okay until we prove otherwise but may
- // continue to perform additional checks after finding
- // errors.
+ // Code below may set okay to false but not to true. We assume okay until we prove otherwise but
+ // may continue to perform additional checks after finding errors.
bool okay = true;
auto& cout = *m->log->getInfo();
cout << "checking " << m->infilename.get() << "\n";
@@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF& pdf)
cout << "File is not linearized\n";
}
- // Write the file to nowhere, uncompressing
- // streams. This causes full file traversal and
+ // Write the file to nowhere, uncompressing streams. This causes full file traversal and
// decoding of all streams we can decode.
QPDFWriter w(pdf);
Pl_Discard discard;
@@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF& pdf)
if (!pdf.getWarnings().empty()) {
m->warnings = true;
} else {
- *m->log->getInfo() << "No syntax or stream encoding errors"
- << " found; the file may still contain\n"
- << "errors that qpdf cannot detect\n";
+ *m->log->getInfo()
+ << "No syntax or stream encoding errors found; the file may still contain\n"
+ << "errors that qpdf cannot detect\n";
}
}
@@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF& pdf)
obj.warnIfPossible("unable to filter stream data");
error = true;
} else {
- // If anything has been written to standard output,
- // this will fail.
+ // If anything has been written to standard output, this will fail.
m->log->saveToStandardOutput(true);
obj.pipeStreamData(
m->log->getSave().get(),
@@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF& pdf)
throw std::runtime_error("attachment " + m->attachment_to_show + " not found");
}
auto efs = fs->getEmbeddedFileStream();
- // saveToStandardOutput has already been called, but it's harmless
- // to call it again, so do as defensive coding.
+ // saveToStandardOutput has already been called, but it's harmless to call it again, so do as
+ // defensive coding.
m->log->saveToStandardOutput(true);
efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all);
}
@@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf)
pldh.getLabelsForPageRange(0, npages - 1, 0, labels);
for (auto iter = labels.begin(); iter != labels.end(); ++iter) {
if ((iter + 1) == labels.end()) {
- // This can't happen, so ignore it. This could only
- // happen if getLabelsForPageRange somehow returned an
- // odd number of items.
+ // This can't happen, so ignore it. This could only happen if getLabelsForPageRange
+ // somehow returned an odd number of items.
break;
}
JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
@@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf)
JSON
QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
{
- // Style: use all lower-case keys with no dashes or underscores.
- // Choose array or dictionary based on indexing. For example, we
- // use a dictionary for objects because we want to index by object
- // ID and an array for pages because we want to index by position.
- // The pages in the pages array contain references back to the
- // original object, which can be resolved in the objects
- // dictionary. When a PDF construct that maps back to an original
- // object is represented separately, use "object" as the key that
- // references the original object.
-
- // This JSON object doubles as a schema and as documentation for
- // our JSON output. Any schema mismatch is a bug in qpdf. This
- // helps to enforce our policy of consistently providing a known
- // structure where every documented key will always be present,
- // which makes it easier to consume our JSON. This is discussed in
- // more depth in the manual.
+ // Style: use all lower-case keys with no dashes or underscores. Choose array or dictionary
+ // based on indexing. For example, we use a dictionary for objects because we want to index by
+ // object ID and an array for pages because we want to index by position. The pages in the pages
+ // array contain references back to the original object, which can be resolved in the objects
+ // dictionary. When a PDF construct that maps back to an original object is represented
+ // separately, use "object" as the key that references the original object.
+
+ // This JSON object doubles as a schema and as documentation for our JSON output. Any schema
+ // mismatch is a bug in qpdf. This helps to enforce our policy of consistently providing a known
+ // structure where every documented key will always be present, which makes it easier to consume
+ // our JSON. This is discussed in more depth in the manual.
JSON schema = JSON::makeDictionary();
schema.addDictionaryMember(
"version",
@@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
bool all_keys = ((keys == nullptr) || keys->empty());
- // The list of selectable top-level keys id duplicated in the
- // following places: job.yml, QPDFJob::json_schema, and
- // QPDFJob::doJSON.
+ // The list of selectable top-level keys id duplicated in the following places: job.yml,
+ // QPDFJob::json_schema, and QPDFJob::doJSON.
if (json_version == 1) {
if (all_keys || keys->count("objects")) {
schema.addDictionaryMember("objects", JSON::parse(R"({
@@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1()
void
QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
{
- // qpdf guarantees that no new top-level keys whose names start
- // with "x-" will be added. These are reserved for users.
+ // qpdf guarantees that no new top-level keys whose names start with "x-" will be added. These
+ // are reserved for users.
std::string captured_json;
std::shared_ptr<Pl_String> pl_str;
@@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
JSON::writeDictionaryOpen(p, first, 0);
if (m->json_output) {
- // Exclude version and parameters to keep the output file
- // minimal. The JSON version is inside the "qpdf" key for
- // version 2.
+ // Exclude version and parameters to keep the output file minimal. The JSON version is
+ // inside the "qpdf" key for version 2.
} else {
- // This version is updated every time a non-backward-compatible
- // change is made to the JSON format. Clients of the JSON are to
- // ignore unrecognized keys, so we only update the version of a
- // key disappears or if its value changes meaning.
+ // This version is updated every time a non-backward-compatible change is made to the JSON
+ // format. Clients of the JSON are to ignore unrecognized keys, so we only update the
+ // version of a key disappears or if its value changes meaning.
JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1);
JSON j_params = JSON::makeDictionary();
std::string decode_level_str;
@@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
}
bool all_keys = m->json_keys.empty();
- // The list of selectable top-level keys id duplicated in the
- // following places: job.yml, QPDFJob::json_schema, and
- // QPDFJob::doJSON.
+ // The list of selectable top-level keys id duplicated in the following places: job.yml,
+ // QPDFJob::json_schema, and QPDFJob::doJSON.
- // We do pages and pagelabels first since they have the side
- // effect of repairing the pages tree, which could potentially
- // impact object references in remaining items.
+ // We do pages and pagelabels first since they have the side effect of repairing the pages tree,
+ // which could potentially impact object references in remaining items.
if (all_keys || m->json_keys.count("pages")) {
doJSONPages(p, first, pdf);
}
@@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
doJSONPageLabels(p, first, pdf);
}
- // The non-special keys are output in alphabetical order, but the
- // order doesn't actually matter.
+ // The non-special keys are output in alphabetical order, but the order doesn't actually matter.
if (all_keys || m->json_keys.count("acroform")) {
doJSONAcroform(p, first, pdf);
}
@@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
doJSONOutlines(p, first, pdf);
}
- // We do objects last so their information is consistent with
- // repairing the page tree. To see the original file with any page
- // tree problems and the page tree not flattened, select
+ // We do objects last so their information is consistent with repairing the page tree. To see
+ // the original file with any page tree problems and the page tree not flattened, select
// qpdf/objects/objectinfo without other keys.
if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) {
doJSONObjects(p, first, pdf);
}
if (m->json_version == 1) {
- // "objectinfo" is not needed for version >1 since you can
- // tell streams from other objects in "objects".
+ // "objectinfo" is not needed for version >1 since you can tell streams from other objects
+ // in "objects".
if (all_keys || m->json_keys.count("objectinfo")) {
doJSONObjectinfo(p, first, pdf);
}
@@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
std::list<std::string> errors;
JSON captured = JSON::parse(captured_json);
if (!captured.checkSchema(schema, errors)) {
- m->log->error("QPDFJob didn't create JSON that complies with "
- "its own rules.\n");
+ m->log->error("QPDFJob didn't create JSON that complies with its own rules.\n");
for (auto const& error: errors) {
*m->log->getError() << error << "\n";
}
@@ -1768,53 +1740,46 @@ QPDFJob::doProcess(
bool used_for_input,
bool main_input)
{
- // If a password has been specified but doesn't work, try other
- // passwords that are equivalent in different character encodings.
- // This makes it possible to open PDF files that were encrypted
- // using incorrect string encodings. For example, if someone used
- // a password encoded in PDF Doc encoding or Windows code page
- // 1252 for an AES-encrypted file or a UTF-8-encoded password on
- // an RC4-encrypted file, or if the password was properly encoded
- // but the password given here was incorrectly encoded, there's a
- // good chance we'd succeed here.
+ // If a password has been specified but doesn't work, try other passwords that are equivalent in
+ // different character encodings. This makes it possible to open PDF files that were encrypted
+ // using incorrect string encodings. For example, if someone used a password encoded in PDF Doc
+ // encoding or Windows code page 1252 for an AES-encrypted file or a UTF-8-encoded password on
+ // an RC4-encrypted file, or if the password was properly encoded but the password given here
+ // was incorrectly encoded, there's a good chance we'd succeed here.
std::string ptemp;
if (password && (!m->password_is_hex_key)) {
if (m->password_mode == QPDFJob::pm_hex_bytes) {
- // Special case: handle --password-mode=hex-bytes for input
- // password as well as output password
+ // Special case: handle --password-mode=hex-bytes for input password as well as output
+ // password
QTC::TC("qpdf", "QPDFJob input password hex-bytes");
ptemp = QUtil::hex_decode(password);
password = ptemp.c_str();
}
}
if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) {
- // There is no password, or we're not doing recovery, so just
- // do the normal processing with the supplied password.
+ // There is no password, or we're not doing recovery, so just do the normal processing with
+ // the supplied password.
doProcessOnce(pdf, fn, password, empty, used_for_input, main_input);
return;
}
- // Get a list of otherwise encoded strings. Keep in scope for this
- // method.
+ // Get a list of otherwise encoded strings. Keep in scope for this method.
std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password);
// Represent to char const*, as required by the QPDF class.
std::vector<char const*> passwords;
for (auto const& iter: passwords_str) {
passwords.push_back(iter.c_str());
}
- // We always try the supplied password first because it is the
- // first string returned by possible_repaired_encodings. If there
- // is more than one option, go ahead and put the supplied password
- // at the end so that it's that decoding attempt whose exception
- // is thrown.
+ // We always try the supplied password first because it is the first string returned by
+ // possible_repaired_encodings. If there is more than one option, go ahead and put the supplied
+ // password at the end so that it's that decoding attempt whose exception is thrown.
if (passwords.size() > 1) {
passwords.push_back(password);
}
- // Try each password. If one works, return the resulting object.
- // If they all fail, throw the exception thrown by the final
- // attempt, which, like the first attempt, will be with the
+ // Try each password. If one works, return the resulting object. If they all fail, throw the
+ // exception thrown by the final attempt, which, like the first attempt, will be with the
// supplied password.
bool warned = false;
for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) {
@@ -1831,9 +1796,9 @@ QPDFJob::doProcess(
if (!warned) {
warned = true;
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
- v << prefix << ": supplied password didn't work;"
- << " trying other passwords based on interpreting"
- << " password with different string encodings\n";
+ v << prefix
+ << ": supplied password didn't work; trying other passwords based on "
+ "interpreting password with different string encodings\n";
});
}
}
@@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage(
fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage());
}
- // If the same page is overlaid or underlaid multiple times,
- // we'll generate multiple names for it, but that's harmless
- // and also a pretty goofy case that's not worth coding
- // around.
+ // If the same page is overlaid or underlaid multiple times, we'll generate multiple names
+ // for it, but that's harmless and also a pretty goofy case that's not worth coding around.
std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
QPDFMatrix cm;
std::string new_content = dest_page.placeFormXObject(
@@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF& pdf)
if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) {
continue;
}
- // This code converts the original page, any underlays, and
- // any overlays to form XObjects. Then it concatenates display
- // of all underlays, the original page, and all overlays.
- // Prior to 11.3.0, the original page contents were wrapped in
- // q/Q, but this didn't work if the original page had
- // unbalanced q/Q operators. See github issue #904.
+ // This code converts the original page, any underlays, and any overlays to form XObjects.
+ // Then it concatenates display of all underlays, the original page, and all overlays. Prior
+ // to 11.3.0, the original page contents were wrapped in q/Q, but this didn't work if the
+ // original page had unbalanced q/Q operators. See github issue #904.
auto& dest_page = main_pages.at(i);
auto dest_page_oh = dest_page.getObjectHandle();
auto this_page_fo = dest_page.getFormXObjectForPage();
- // The resulting form xobject lazily reads the content from
- // the original page, which we are going to replace. Therefore
- // we have to explicitly copy it.
+ // The resulting form xobject lazily reads the content from the original page, which we are
+ // going to replace. Therefore we have to explicitly copy it.
auto content_data = this_page_fo.getRawStreamData();
this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle());
auto resources =
@@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF& pdf)
}
message = pdf.getFilename() +
" already has attachments with the following keys: " + message +
- "; use --replace to replace or --key to specify a different "
- "key";
+ "; use --replace to replace or --key to specify a different key";
throw std::runtime_error(message);
}
}
@@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF& pdf)
message += i;
}
message = pdf.getFilename() +
- " already has attachments with keys that conflict with"
- " attachments from other files: " +
+ " already has attachments with keys that conflict with attachments from other files: " +
message +
- ". Use --prefix with --copy-attachments-from"
- " or manually copy individual attachments.";
+ ". Use --prefix with --copy-attachments-from or manually copy individual attachments.";
throw std::runtime_error(message);
}
}
@@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf)
return true;
}
- // Unreferenced resources are common in files where resources
- // dictionaries are shared across pages. As a heuristic, we look
- // in the file for shared resources dictionaries or shared XObject
- // subkeys of resources dictionaries either on pages or on form
- // XObjects in pages. If we find any, then there is a higher
- // likelihood that the expensive process of finding unreferenced
- // resources is worth it.
+ // Unreferenced resources are common in files where resources dictionaries are shared across
+ // pages. As a heuristic, we look in the file for shared resources dictionaries or shared
+ // XObject subkeys of resources dictionaries either on pages or on form XObjects in pages. If we
+ // find any, then there is a higher likelihood that the expensive process of finding
+ // unreferenced resources is worth it.
// Return true as soon as we find any shared resources.
@@ -2332,8 +2287,8 @@ added_page(QPDF& pdf, QPDFObjectHandle page)
{
QPDFObjectHandle result = page;
if (&page.getQPDF() != &pdf) {
- // Calling copyForeignObject on an object we already copied
- // will give us the already existing copy.
+ // Calling copyForeignObject on an object we already copied will give us the already
+ // existing copy.
result = pdf.copyForeignObject(page);
}
return result;
@@ -2348,8 +2303,7 @@ added_page(QPDF& pdf, QPDFPageObjectHelper page)
void
QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap)
{
- // Parse all page specifications and translate them into lists of
- // actual pages.
+ // Parse all page specifications and translate them into lists of actual pages.
// Handle "." as a shortcut for the input file
for (auto& page_spec: m->page_specs) {
@@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
}
if (!m->keep_files_open_set) {
- // Count the number of distinct files to determine whether we
- // should keep files open or not. Rather than trying to code
- // some portable heuristic based on OS limits, just hard-code
+ // Count the number of distinct files to determine whether we should keep files open or not.
+ // Rather than trying to code some portable heuristic based on OS limits, just hard-code
// this at a given number and allow users to override.
std::set<std::string> filenames;
for (auto& page_spec: m->page_specs) {
@@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
for (auto& page_spec: m->page_specs) {
if (page_spec_qpdfs.count(page_spec.filename) == 0) {
- // Open the PDF file and store the QPDF object. Throw a
- // std::shared_ptr to the qpdf into a heap so that it
- // survives through copying to the output but gets cleaned up
- // automatically at the end. Do not canonicalize the file
- // name. Using two different paths to refer to the same
- // file is a documented workaround for duplicating a page.
- // If you are using this an example of how to do this with
- // the API, you can just create two different QPDF objects
- // to the same underlying file with the same path to
- // achieve the same affect.
+ // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
+ // a heap so that it survives through copying to the output but gets cleaned up
+ // automatically at the end. Do not canonicalize the file name. Using two different
+ // paths to refer to the same file is a documented workaround for duplicating a page. If
+ // you are using this an example of how to do this with the API, you can just create two
+ // different QPDF objects to the same underlying file with the same path to achieve the
+ // same affect.
char const* password = page_spec.password.get();
if ((!m->encryption_file.empty()) && (password == nullptr) &&
(page_spec.filename == m->encryption_file)) {
@@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
}
}
- // Read original pages from the PDF, and parse the page range
- // associated with this occurrence of the file.
+ // Read original pages from the PDF, and parse the page range associated with this
+ // occurrence of the file.
parsed_specs.push_back(
// line-break
QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range));
@@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
}
}
- // Clear all pages out of the primary QPDF's pages tree but leave
- // the objects in place in the file so they can be re-added
- // without changing their object numbers. This enables other
- // things in the original file, such as outlines, to continue to
- // work.
+ // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the
+ // file so they can be re-added without changing their object numbers. This enables other things
+ // in the original file, such as outlines, to continue to work.
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": removing unreferenced pages from primary input\n";
});
@@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
}
if (m->collate && (parsed_specs.size() > 1)) {
- // Collate the pages by selecting one page from each spec in
- // order. When a spec runs out of pages, stop selecting from
- // it.
+ // Collate the pages by selecting one page from each spec in order. When a spec runs out of
+ // pages, stop selecting from it.
std::vector<QPDFPageData> new_parsed_specs;
size_t nspecs = parsed_specs.size();
size_t cur_page = 0;
@@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
parsed_specs = new_parsed_specs;
}
- // Add all the pages from all the files in the order specified.
- // Keep track of any pages from the original file that we are
- // selecting.
+ // Add all the pages from all the files in the order specified. Keep track of any pages from the
+ // original file that we are selecting.
std::set<int> selected_from_orig;
std::vector<QPDFObjectHandle> new_labels;
bool any_page_labels = false;
@@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
v << prefix << ": adding pages from " << page_data.filename << "\n";
});
for (auto pageno_iter: page_data.selected_pages) {
- // Pages are specified from 1 but numbered from 0 in the
- // vector
+ // Pages are specified from 1 but numbered from 0 in the vector
int pageno = pageno_iter - 1;
pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels);
QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno));
@@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
bool first_copy_from_orig = false;
bool this_file = (page_data.qpdf == &pdf);
if (this_file) {
- // This is a page from the original file. Keep track
- // of the fact that we are using it.
+ // This is a page from the original file. Keep track of the fact that we are using
+ // it.
first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
selected_from_orig.insert(pageno);
}
auto new_page = added_page(pdf, to_copy);
- // Try to avoid gratuitously renaming fields. In the case
- // of where we're just extracting a bunch of pages from
- // the original file and not copying any page more than
- // once, there's no reason to do anything with the fields.
- // Since we don't remove fields from the original file
- // until all copy operations are completed, any foreign
- // pages that conflict with original pages will be
- // adjusted. If we copy any page from the original file
- // more than once, that page would be in conflict with the
- // previous copy of itself.
+ // Try to avoid gratuitously renaming fields. In the case of where we're just extracting
+ // a bunch of pages from the original file and not copying any page more than once,
+ // there's no reason to do anything with the fields. Since we don't remove fields from
+ // the original file until all copy operations are completed, any foreign pages that
+ // conflict with original pages will be adjusted. If we copy any page from the original
+ // file more than once, that page would be in conflict with the previous copy of itself.
if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) {
if (!this_file) {
QTC::TC("qpdf", "QPDFJob copy fields not this file");
@@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
qpdf_e_damaged_pdf,
"",
0,
- ("Exception caught while fixing copied"
- " annotations. This may be a qpdf bug. " +
+ ("Exception caught while fixing copied annotations. This may be a qpdf "
+ "bug. " +
std::string("Exception: ") + e.what()));
}
}
@@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
pdf.getRoot().replaceKey("/PageLabels", page_labels);
}
- // Delete page objects for unused page in primary. This prevents
- // those objects from being preserved by being referred to from
- // other places, such as the outlines dictionary. Also make sure
- // we keep form fields from pages we preserved.
+ // Delete page objects for unused page in primary. This prevents those objects from being
+ // preserved by being referred to from other places, such as the outlines dictionary. Also make
+ // sure we keep form fields from pages we preserved.
for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) {
auto page = orig_pages.at(pageno);
if (selected_from_orig.count(QIntC::to_int(pageno))) {
@@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password)
std::string encoded;
if (!QUtil::utf8_to_pdf_doc(password, encoded)) {
QTC::TC("qpdf", "QPDFJob password not encodable");
- throw std::runtime_error("supplied password cannot be encoded for"
- " 40-bit or 128-bit encryption formats");
+ throw std::runtime_error("supplied password cannot be encoded for 40-bit "
+ "or 128-bit encryption formats");
}
password = encoded;
}
@@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password)
if (QUtil::utf8_to_pdf_doc(password, encoded)) {
QTC::TC("qpdf", "QPDFJob auto-encode password");
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
- v << prefix << ": automatically converting Unicode"
- << " password to single-byte encoding as"
- << " required for 40-bit or 128-bit"
- << " encryption\n";
+ v << prefix
+ << ": automatically converting Unicode password to single-byte "
+ "encoding as required for 40-bit or 128-bit encryption\n";
});
password = encoded;
} else {
QTC::TC("qpdf", "QPDFJob bytes fallback warning");
- *m->log->getError() << m->message_prefix << ": WARNING: "
- << "supplied password looks like a Unicode"
- << " password with characters not allowed in"
- << " passwords for 40-bit and 128-bit "
- "encryption;"
- << " most readers will not be able to open this"
- << " file with the supplied password."
- << " (Use --password-mode=bytes to suppress "
- "this"
- << " warning and use the password anyway.)\n";
+ *m->log->getError()
+ << m->message_prefix
+ << ": WARNING: supplied password looks like a Unicode password with "
+ "characters not allowed in passwords for 40-bit and 128-bit "
+ "encryption; most readers will not be able to open this file with "
+ "the supplied password. (Use --password-mode=bytes to suppress this "
+ "warning and use the password anyway.)\n";
}
} else if ((R >= 5) && (!is_valid_utf8)) {
QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto");
- throw std::runtime_error("supplied password is not a valid Unicode password,"
- " which is required for 256-bit encryption; to"
- " really use this password, rerun with the"
- " --password-mode=bytes option");
+ throw std::runtime_error(
+ "supplied password is not a valid Unicode password, which is required for "
+ "256-bit encryption; to really use this password, rerun with the "
+ "--password-mode=bytes option");
}
}
}
@@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF& pdf, QPDFWriter& w)
if ((R < 4) || ((R == 4) && (!m->use_aes))) {
if (!m->allow_weak_crypto) {
QTC::TC("qpdf", "QPDFJob weak crypto error");
- *m->log->getError() << m->message_prefix
- << ": refusing to write a file with RC4, a weak "
- "cryptographic "
- "algorithm\n"
- << "Please use 256-bit keys for better security.\n"
- << "Pass --allow-weak-crypto to enable writing insecure "
- "files.\n"
- << "See also "
- "https://qpdf.readthedocs.io/en/stable/"
- "weak-crypto.html\n";
+ *m->log->getError()
+ << m->message_prefix
+ << ": refusing to write a file with RC4, a weak cryptographic algorithm\n"
+ "Please use 256-bit keys for better security.\n"
+ "Pass --allow-weak-crypto to enable writing insecure files.\n"
+ "See also https://qpdf.readthedocs.io/en/stable/weak-crypto.html\n";
throw std::runtime_error("refusing to write a file with weak crypto");
}
}
@@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF& pdf)
qpdf_e_damaged_pdf,
"",
0,
- ("Exception caught while fixing copied"
- " annotations. This may be a qpdf bug." +
+ ("Exception caught while fixing copied annotations. This may be a qpdf "
+ "bug." +
std::string("Exception: ") + e.what()));
}
}
@@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF& pdf)
{
std::shared_ptr<char> temp_out;
if (m->replace_input) {
- // Append but don't prepend to the path to generate a
- // temporary name. This saves us from having to split the path
- // by directory and non-directory.
+ // Append but don't prepend to the path to generate a temporary name. This saves us from
+ // having to split the path by directory and non-directory.
temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#");
- // m->outfilename will be restored to 0 before temp_out
- // goes out of scope.
+ // m->outfilename will be restored to 0 before temp_out goes out of scope.
m->outfilename = temp_out;
} else if (strcmp(m->outfilename.get(), "-") == 0) {
m->outfilename = nullptr;
@@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF& pdf)
if (m->json_version) {
writeJSON(pdf);
} else {
- // QPDFWriter must have block scope so the output file will be
- // closed after write() finishes.
+ // QPDFWriter must have block scope so the output file will be closed after write()
+ // finishes.
QPDFWriter w(pdf);
if (m->outfilename) {
w.setOutputFilename(m->outfilename.get());
} else {
- // saveToStandardOutput has already been called, but
- // calling it again is defensive and harmless.
+ // saveToStandardOutput has already been called, but calling it again is defensive and
+ // harmless.
m->log->saveToStandardOutput(true);
w.setOutputPipeline(m->log->getSave().get());
}
@@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF& pdf)
void
QPDFJob::writeJSON(QPDF& pdf)
{
- // File pipeline must have block scope so it will be closed
- // after write.
+ // File pipeline must have block scope so it will be closed after write.
std::shared_ptr<QUtil::FileCloser> fc;
std::shared_ptr<Pipeline> fp;
if (m->outfilename.get()) {
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 7270b76d..fa5e52e8 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) :
QPDFObjectHandle::StreamDataProvider::~StreamDataProvider()
{
- // Must be explicit and not inline -- see QPDF_DLL_CLASS in
- // README-maintainer
+ // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
}
void
@@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
void
QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
{
- throw std::logic_error("You must override one of the"
- " handleObject methods in ParserCallbacks");
+ throw std::logic_error("You must override one of the handleObject methods in ParserCallbacks");
}
void
QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t)
{
- // This version of handleObject was added in qpdf 9. If the
- // developer did not override it, fall back to the older
- // interface.
+ // This version of handleObject was added in qpdf 9. If the developer did not override it, fall
+ // back to the older interface.
handleObject(oh);
}
@@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt()
result = 0;
} else if (v > UINT_MAX) {
QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX");
- warnIfPossible("requested value of unsigned integer is too big;"
- " returning UINT_MAX");
+ warnIfPossible("requested value of unsigned integer is too big; returning UINT_MAX");
result = UINT_MAX;
} else {
result = static_cast<unsigned int>(v);
@@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources(
QPDFObjectHandle this_val = getKey(rtype);
if (this_val.isDictionary() && other_val.isDictionary()) {
if (this_val.isIndirect()) {
- // Do this even if there are no keys. Various
- // places in the code call mergeResources with
- // resource dictionaries that contain empty
- // subdictionaries just to get this shallow copy
- // functionality.
+ // Do this even if there are no keys. Various places in the code call
+ // mergeResources with resource dictionaries that contain empty subdictionaries
+ // just to get this shallow copy functionality.
QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy());
}
@@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray(
"",
description,
0,
- " object is supposed to be a stream or an"
- " array of streams but is neither"));
+ " object is supposed to be a stream or an array of streams but is neither"));
}
bool first = true;
@@ -1526,8 +1519,8 @@ void
QPDFObjectHandle::rotatePage(int angle, bool relative)
{
if ((angle % 90) != 0) {
- throw std::runtime_error("QPDF::rotatePage called with an"
- " angle that is not a multiple of 90");
+ throw std::runtime_error(
+ "QPDF::rotatePage called with an angle that is not a multiple of 90");
}
int new_angle = angle;
if (relative) {
@@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
new_angle += old_angle;
}
new_angle = (new_angle + 360) % 360;
- // Make this explicit even with new_angle == 0 since /Rotate can
- // be inherited.
+ // Make this explicit even with new_angle == 0 since /Rotate can be inherited.
replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle));
}
@@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams()
QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
return;
} else if (!contents.isArray()) {
- // /Contents is optional for pages, and some very damaged
- // files may have pages that are invalid in other ways.
+ // /Contents is optional for pages, and some very damaged files may have pages that are
+ // invalid in other ways.
return;
}
- // Should not be possible for a page object to not have an
- // owning PDF unless it was manually constructed in some
- // incorrect way. However, it can happen in a PDF file whose
- // page structure is direct, which is against spec but still
- // possible to hand construct, as in fuzz issue 27393.
+ // Should not be possible for a page object to not have an owning PDF unless it was manually
+ // constructed in some incorrect way. However, it can happen in a PDF file whose page structure
+ // is direct, which is against spec but still possible to hand construct, as in fuzz issue
+ // 27393.
QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file");
QPDFObjectHandle new_contents = newStream(&qpdf);
@@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data(
callbacks->handleObject(obj, QIntC::to_size(offset), length);
if (obj.isOperator() && (obj.getOperatorValue() == "ID")) {
- // Discard next character; it is the space after ID that
- // terminated the token. Read until end of inline image.
+ // Discard next character; it is the space after ID that terminated the token. Read
+ // until end of inline image.
char ch;
input->read(&ch, 1);
tokenizer.expectInlineImage(input);
@@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf)
void
QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description)
{
- // This is called during parsing on newly created direct objects,
- // so we can't call dereference() here.
+ // This is called during parsing on newly created direct objects, so we can't call dereference()
+ // here.
if (isInitialized() && obj.get()) {
auto descr = std::make_shared<QPDFValue::Description>(object_description);
obj->setDescription(owning_qpdf, descr);
@@ -2070,8 +2061,7 @@ QPDFObjectHandle
QPDFObjectHandle::shallowCopy()
{
if (!dereference()) {
- throw std::logic_error("operation attempted on uninitialized "
- "QPDFObjectHandle");
+ throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
}
return QPDFObjectHandle(obj->copy());
}
@@ -2080,8 +2070,7 @@ QPDFObjectHandle
QPDFObjectHandle::unsafeShallowCopy()
{
if (!dereference()) {
- throw std::logic_error("operation attempted on uninitialized "
- "QPDFObjectHandle");
+ throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
}
return QPDFObjectHandle(obj->copy(true));
}
@@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams)
auto cur_og = getObjGen();
if (!visited.add(cur_og)) {
QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
- throw std::runtime_error("loop detected while converting object from "
- "indirect to direct");
+ throw std::runtime_error("loop detected while converting object from indirect to direct");
}
if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) {
@@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams)
throw std::runtime_error("attempt to make a stream into a direct object");
}
} else if (isReserved()) {
- throw std::logic_error("QPDFObjectHandle: attempting to make a"
- " reserved object handle direct");
+ throw std::logic_error(
+ "QPDFObjectHandle: attempting to make a reserved object handle direct");
} else {
- throw std::logic_error("QPDFObjectHandle::makeDirectInternal: "
- "unknown object type");
+ throw std::logic_error("QPDFObjectHandle::makeDirectInternal: unknown object type");
}
visited.erase(cur_og);
@@ -2162,8 +2149,7 @@ void
QPDFObjectHandle::assertInitialized() const
{
if (!isInitialized()) {
- throw std::logic_error("operation attempted on uninitialized "
- "QPDFObjectHandle");
+ throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
}
}
@@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warn
{
QPDF* context = nullptr;
std::string description;
- // Type checks above guarantee that the object has been dereferenced.
- // Nevertheless, dereference throws exceptions in the test suite
+ // Type checks above guarantee that the object has been dereferenced. Nevertheless, dereference
+ // throws exceptions in the test suite
if (!dereference()) {
throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle");
}
@@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const
auto item_qpdf = item.getOwningQPDF();
if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) {
QTC::TC("qpdf", "QPDFObjectHandle check ownership");
- throw std::logic_error("Attempting to add an object from a different QPDF."
- " Use QPDF::copyForeignObject to add objects from another file.");
+ throw std::logic_error("Attempting to add an object from a different QPDF. Use "
+ "QPDF::copyForeignObject to add objects from another file.");
}
}
@@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference()
void
QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e)
{
- // If parsing on behalf of a QPDF object and want to give a
- // warning, we can warn through the object. If parsing for some
- // other reason, such as an explicit creation of an object from a
+ // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
+ // object. If parsing for some other reason, such as an explicit creation of an object from a
// string, then just throw the exception.
if (qpdf) {
qpdf->warn(e);
@@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const& error_msg) const
{
auto result = isInitialized() ? this->obj->getQPDF() : nullptr;
if (result == nullptr) {
- throw std::runtime_error(error_msg == "" ? "attempt to use a null qpdf object" : error_msg);
+ throw std::runtime_error(
+ error_msg.empty() ? "attempt to use a null qpdf object" : error_msg);
}
return *result;
}
diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc
index a34ed28f..608254e4 100644
--- a/libqpdf/QPDFPageObjectHelper.cc
+++ b/libqpdf/QPDFPageObjectHelper.cc
@@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
} else if (name == "/I") {
name = "/Indexed";
} else {
- // This is a key in the page's /Resources ->
- // /ColorSpace dictionary. We need to look it up
- // and use its value as the color space for the
- // image.
+ // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
+ // look it up and use its value as the color space for the image.
QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
if (colorspace.isDictionary() && colorspace.hasKey(name)) {
QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
@@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
{
if (shallow) {
QPDFObjectHandle resources = getAttribute("/Resources", true);
- // Calling mergeResources also ensures that /XObject becomes
- // direct and is not shared with other pages.
+ // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
+ // other pages.
resources.mergeResources("<< /XObject << >> >>"_qpdf);
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
Pl_Buffer b("new page content");
@@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
return false;
}
- // We will walk through /Font and /XObject dictionaries, removing
- // any resources that are not referenced. We must make copies of
- // resource dictionaries down into the dictionaries are mutating
- // to prevent mutating one dictionary from having the side effect
- // of mutating the one it was copied from.
+ // We will walk through /Font and /XObject dictionaries, removing any resources that are not
+ // referenced. We must make copies of resource dictionaries down into the dictionaries are
+ // mutating to prevent mutating one dictionary from having the side effect of mutating the one
+ // it was copied from.
QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
std::vector<QPDFObjectHandle> rdicts;
std::set<std::string> known_names;
@@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
}
}
}
- // Older versions of the PDF spec allowed form XObjects to omit
- // their resources dictionaries, in which case names were resolved
- // from the containing page. This behavior seems to be widely
- // supported by viewers. If a form XObjects has a resources
- // dictionary and has some unresolved names, some viewers fail to
- // resolve them, and others allow them to be inherited from the
- // page or from another form XObjects that contains them. Since
- // this behavior is inconsistent across viewers, we consider an
- // unresolved name when a resources dictionary is present to be
- // reason not to remove unreferenced resources. An unresolved name
- // in the absence of a resource dictionary is not considered a
- // problem. For form XObjects, we just accumulate a list of
- // unresolved names, and for page objects, we avoid removing any
- // such names found in nested form XObjects.
+ // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
+ // which case names were resolved from the containing page. This behavior seems to be widely
+ // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
+ // names, some viewers fail to resolve them, and others allow them to be inherited from the page
+ // or from another form XObjects that contains them. Since this behavior is inconsistent across
+ // viewers, we consider an unresolved name when a resources dictionary is present to be reason
+ // not to remove unreferenced resources. An unresolved name in the absence of a resource
+ // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
+ // unresolved names, and for page objects, we avoid removing any such names found in nested form
+ // XObjects.
if ((!local_unresolved.empty()) && resources.isDictionary()) {
- // It's not worth issuing a warning for this case. From qpdf
- // 10.3, we are hopefully only looking at names that are
- // referencing fonts and XObjects, but until we're certain
- // that we know the meaning of every name in a content stream,
- // we don't want to give warnings that might be false
- // positives. Also, this can happen in legitimate cases with
- // older PDFs, and there's nothing to be done about it, so
- // there's no good reason to issue a warning. The only sad
- // thing is that it was a false positive that alerted me to a
- // logic error in the code, and any future such errors would
- // now be hidden.
+ // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
+ // looking at names that are referencing fonts and XObjects, but until we're certain that we
+ // know the meaning of every name in a content stream, we don't want to give warnings that
+ // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
+ // there's nothing to be done about it, so there's no good reason to issue a warning. The
+ // only sad thing is that it was a false positive that alerted me to a logic error in the
+ // code, and any future such errors would now be hidden.
QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
return false;
}
@@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
for (auto& dict: rdicts) {
for (auto const& key: dict.getKeys()) {
if (is_page && unresolved.count(key)) {
- // This name is referenced by some nested form
- // xobject, so don't remove it.
+ // This name is referenced by some nested form xobject, so don't remove it.
QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
} else if (!rf.getNames().count(key)) {
dict.removeKey(key);
@@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
void
QPDFPageObjectHelper::removeUnreferencedResources()
{
- // Accumulate a list of unresolved names across all nested form
- // XObjects.
+ // Accumulate a list of unresolved names across all nested form XObjects.
std::set<std::string> unresolved;
bool any_failures = false;
forEachFormXObject(
@@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
QPDFObjectHandle
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
{
- auto result = this->oh
- .getQPDF("QPDFPageObjectHelper::getFormXObjectForPage "
- "called with a direct object")
- .newStream();
+ auto result =
+ this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
+ .newStream();
QPDFObjectHandle newdict = result.getDict();
newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
@@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
bool allow_shrink,
bool allow_expand)
{
- // Calculate the transformation matrix that will place the given
- // form XObject fully inside the given rectangle, center and
- // shrinking or expanding as needed if requested.
-
- // When rendering a form XObject, the transformation in the
- // graphics state (cm) is applied first (of course -- when it is
- // applied, the PDF interpreter doesn't even know we're going to
- // be drawing a form XObject yet), and then the object's matrix
- // (M) is applied. The resulting matrix, when applied to the form
- // XObject's bounding box, will generate a new rectangle. We want
- // to create a transformation matrix that make the form XObject's
- // bounding box land in exactly the right spot.
+ // Calculate the transformation matrix that will place the given form XObject fully inside the
+ // given rectangle, center and shrinking or expanding as needed if requested.
+
+ // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
+ // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
+ // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
+ // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
+ // want to create a transformation matrix that make the form XObject's bounding box land in
+ // exactly the right spot.
QPDFObjectHandle fdict = fo.getDict();
QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
@@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
QPDFMatrix tmatrix; // "to" matrix
QPDFMatrix fmatrix; // "from" matrix
if (invert_transformations) {
- // tmatrix inverts scaling and rotation of the destination
- // page. Applying this matrix allows the overlaid form
- // XObject's to be absolute rather than relative to properties
- // of the destination page. tmatrix is part of the computed
- // transformation matrix.
+ // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
+ // the overlaid form XObject's to be absolute rather than relative to properties of the
+ // destination page. tmatrix is part of the computed transformation matrix.
tmatrix = QPDFMatrix(getMatrixForTransformations(true));
wmatrix.concat(tmatrix);
}
if (fdict.getKey("/Matrix").isMatrix()) {
- // fmatrix is the transformation matrix that is applied to the
- // form XObject itself. We need this for calculations, but we
- // don't explicitly use it in the final result because the PDF
+ // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
+ // this for calculations, but we don't explicitly use it in the final result because the PDF
// rendering system automatically applies this last before
// drawing the form XObject.
fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
wmatrix.concat(fmatrix);
}
- // The current wmatrix handles transformation from the form
- // xobject and, if requested, the destination page. Next, we have
- // to adjust this for scale and position.
+ // The current wmatrix handles transformation from the form xobject and, if requested, the
+ // destination page. Next, we have to adjust this for scale and position.
- // Step 1: figure out what scale factor we need to make the form
- // XObject's bounding box fit within the destination rectangle.
+ // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
+ // within the destination rectangle.
// Transform bounding box
QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
- // Calculate a scale factor, if needed. Shrink or expand if needed
- // and allowed.
+ // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
if ((T.urx == T.llx) || (T.ury == T.lly)) {
// avoid division by zero
return QPDFMatrix();
@@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
}
}
- // Step 2: figure out what translation is required to get the
- // rectangle to the right spot: centered within the destination.
+ // Step 2: figure out what translation is required to get the rectangle to the right spot:
+ // centered within the destination.
wmatrix = QPDFMatrix();
wmatrix.scale(scale, scale);
wmatrix.concat(tmatrix);
@@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
double tx = r_cx - t_cx;
double ty = r_cy - t_cy;
- // Now we can calculate the final matrix. The final matrix does
- // not include fmatrix because that is applied automatically by
- // the PDF interpreter.
+ // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
+ // is applied automatically by the PDF interpreter.
QPDFMatrix cm;
cm.translate(tx, ty);
cm.scale(scale, scale);
@@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
auto rect = box.getArrayAsRectangle();
decltype(rect) new_rect;
- // How far are the edges of our rectangle from the edges
- // of the media box?
+ // How far are the edges of our rectangle from the edges of the media box?
auto left_x = rect.llx - media_rect.llx;
auto right_x = media_rect.urx - rect.urx;
auto bottom_y = rect.lly - media_rect.lly;
auto top_y = media_rect.ury - rect.ury;
- // Rotating the page 180 degrees does not change
- // /MediaBox. Rotating 90 or 270 degrees reverses llx and
- // lly and also reverse urx and ury. For all the other
- // boxes, we want the corners to be the correct distance
- // away from the corners of the mediabox.
+ // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
+ // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
+ // corners to be the correct distance away from the corners of the mediabox.
switch (rotate) {
case 90:
new_rect.llx = media_rect.lly + bottom_y;
@@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
}
- // When we rotate the page, pivot about the point 0, 0 and then
- // translate so the page is visible with the origin point being
- // the same offset from the lower left corner of the media box.
+ // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
+ // with the origin point being the same offset from the lower left corner of the media box.
// These calculations have been verified empirically with various
// PDF readers.
QPDFMatrix cm(0, 0, 0, 0, 0, 0);
diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc
index 5d695897..48227e55 100644
--- a/libqpdf/QPDFParser.cc
+++ b/libqpdf/QPDFParser.cc
@@ -41,12 +41,10 @@ namespace
QPDFObjectHandle
QPDFParser::parse(bool& empty, bool content_stream)
{
- // This method must take care not to resolve any objects. Don't
- // check the type of any object without first ensuring that it is
- // a direct object. Otherwise, doing so may have the side effect
- // of reading the object and changing the file pointer. If you do
- // this, it will cause a logic error to be thrown from
- // QPDF::inParse().
+ // This method must take care not to resolve any objects. Don't check the type of any object
+ // without first ensuring that it is a direct object. Otherwise, doing so may have the side
+ // effect of reading the object and changing the file pointer. If you do this, it will cause a
+ // logic error to be thrown from QPDF::inParse().
const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();
QPDF::ParseGuard pg(context);
@@ -193,18 +191,16 @@ QPDFParser::parse(bool& empty, bool content_stream)
!olist.at(size - 2)->getObjGen().isIndirect()) {
if (context == nullptr) {
QTC::TC("qpdf", "QPDFParser indirect without context");
- throw std::logic_error("QPDFObjectHandle::parse called without context"
- " on an object with indirect references");
+ throw std::logic_error("QPDFObjectHandle::parse called without context on "
+ "an object with indirect references");
}
auto ref_og = QPDFObjGen(
QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),
QPDFObjectHandle(olist.back()).getIntValueAsInt());
if (ref_og.isIndirect()) {
- // This action has the desirable side effect
- // of causing dangling references (references
- // to indirect objects that don't appear in
- // the PDF) in any parsed object to appear in
- // the object cache.
+ // This action has the desirable side effect of causing dangling references
+ // (references to indirect objects that don't appear in the PDF) in any
+ // parsed object to appear in the object cache.
object = context->getObject(ref_og).obj;
indirect_ref = true;
} else {
@@ -214,16 +210,14 @@ QPDFParser::parse(bool& empty, bool content_stream)
olist.pop_back();
olist.pop_back();
} else if ((value == "endobj") && (state == st_top)) {
- // We just saw endobj without having read
- // anything. Treat this as a null and do not move
- // the input source's offset.
+ // We just saw endobj without having read anything. Treat this as a null and do
+ // not move the input source's offset.
is_null = true;
input->seek(input->getLastOffset(), SEEK_SET);
empty = true;
} else {
QTC::TC("qpdf", "QPDFParser treat word as string");
- warn("unknown token while reading object;"
- " treating as string");
+ warn("unknown token while reading object; treating as string");
bad = true;
object = QPDF_String::create(value);
}
@@ -250,8 +244,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
break;
default:
- warn("treating unknown token type as null while "
- "reading object");
+ warn("treating unknown token type as null while reading object");
bad = true;
is_null = true;
break;
@@ -259,8 +252,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
if (object == nullptr && !is_null &&
(!((state == st_start) || (state == st_stop) || (state == st_eof)))) {
- throw std::logic_error("QPDFObjectHandle::parseInternal: "
- "unexpected uninitialized object");
+ throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
is_null = true;
}
@@ -274,8 +266,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
}
}
if (bad_count > 5) {
- // We had too many consecutive errors without enough
- // intervening successful objects. Give up.
+ // We had too many consecutive errors without enough intervening successful objects.
+ // Give up.
warn("too many errors; giving up on reading object");
state = st_top;
is_null = true;
@@ -287,8 +279,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
warn("parse error while reading object");
}
done = true;
- // In content stream mode, leave object uninitialized to
- // indicate EOF
+ // In content stream mode, leave object uninitialized to indicate EOF
if (!content_stream) {
is_null = true;
}
@@ -298,8 +289,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
case st_array:
if (is_null) {
object = null_oh;
- // No need to set description for direct nulls - they probably
- // will become implicit.
+ // No need to set description for direct nulls - they probably will become implicit.
} else if (!indirect_ref) {
setDescription(object, input->getLastOffset());
}
@@ -316,23 +306,22 @@ QPDFParser::parse(bool& empty, bool content_stream)
case st_stop:
if ((state_stack.size() < 2) || (stack.size() < 2)) {
- throw std::logic_error("QPDFObjectHandle::parseInternal: st_stop encountered"
- " with insufficient elements in stack");
+ throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
+ "insufficient elements in stack");
}
parser_state_e old_state = state_stack.back();
state_stack.pop_back();
if (old_state == st_array) {
object = QPDF_Array::create(std::move(olist), frame.null_count > 100);
setDescription(object, offset - 1);
- // The `offset` points to the next of "[". Set the rewind
- // offset to point to the beginning of "[". This has been
- // explicitly tested with whitespace surrounding the array start
- // delimiter. getLastOffset points to the array end token and
- // therefore can't be used here.
+ // The `offset` points to the next of "[". Set the rewind offset to point to the
+ // beginning of "[". This has been explicitly tested with whitespace surrounding the
+ // array start delimiter. getLastOffset points to the array end token and therefore
+ // can't be used here.
set_offset = true;
} else if (old_state == st_dictionary) {
- // Convert list to map. Alternating elements are keys. Attempt
- // to recover more or less gracefully from invalid dictionaries.
+ // Convert list to map. Alternating elements are keys. Attempt to recover more or
+ // less gracefully from invalid dictionaries.
std::set<std::string> names;
for (auto& obj: olist) {
if (obj) {
@@ -358,8 +347,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
}
warn(
offset,
- "expected dictionary key but found"
- " non-name object; inserting key " +
+ "expected dictionary key but found non-name object; inserting key " +
key);
}
if (dict.count(key) > 0) {
@@ -367,8 +355,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
warn(
offset,
"dictionary has duplicated key " + key +
- "; last occurrence overrides earlier "
- "ones");
+ "; last occurrence overrides earlier ones");
}
// Calculate value.
@@ -380,8 +367,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
QTC::TC("qpdf", "QPDFParser no val for last key");
warn(
offset,
- "dictionary ended prematurely; "
- "using null as value for last key");
+ "dictionary ended prematurely; using null as value for last key");
val = QPDF_Null::create();
}
@@ -395,11 +381,10 @@ QPDFParser::parse(bool& empty, bool content_stream)
}
object = QPDF_Dictionary::create(std::move(dict));
setDescription(object, offset - 2);
- // The `offset` points to the next of "<<". Set the rewind
- // offset to point to the beginning of "<<". This has been
- // explicitly tested with whitespace surrounding the dictionary
- // start delimiter. getLastOffset points to the dictionary end
- // token and therefore can't be used here.
+ // The `offset` points to the next of "<<". Set the rewind offset to point to the
+ // beginning of "<<". This has been explicitly tested with whitespace surrounding
+ // the dictionary start delimiter. getLastOffset points to the dictionary end token
+ // and therefore can't be used here.
set_offset = true;
}
stack.pop_back();
@@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse
void
QPDFParser::warn(QPDFExc const& e) const
{
- // If parsing on behalf of a QPDF object and want to give a
- // warning, we can warn through the object. If parsing for some
- // other reason, such as an explicit creation of an object from a
+ // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
+ // object. If parsing for some other reason, such as an explicit creation of an object from a
// string, then just throw the exception.
if (context) {
context->warn(e);
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index da02a0fe..d98af8a9 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -1,8 +1,7 @@
#include <qpdf/QPDFTokenizer.hh>
-// DO NOT USE ctype -- it is locale dependent for some things, and
-// it's not worth the risk of including it in case it may accidentally
-// be used.
+// DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
+// including it in case it may accidentally be used.
#include <qpdf/QIntC.hh>
#include <qpdf/QPDFExc.hh>
@@ -45,8 +44,8 @@ namespace
bool
QPDFWordTokenFinder::check()
{
- // Find a word token matching the given string, preceded by a
- // delimiter, and followed by a delimiter or EOF.
+ // Find a word token matching the given string, preceded by a delimiter, and followed by a
+ // delimiter or EOF.
QPDFTokenizer tokenizer;
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
qpdf_offset_t pos = is->tell();
@@ -68,8 +67,7 @@ QPDFWordTokenFinder::check()
return false;
}
if (token_start == 0) {
- // Can't actually happen...we never start the search at the
- // beginning of the input.
+ // Can't actually happen...we never start the search at the beginning of the input.
return false;
}
return true;
@@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch)
void
QPDFTokenizer::handleCharacter(char ch)
{
- // State machine is implemented such that the final character may not be
- // handled. This happens whenever you have to use a character from the
- // next token to detect the end of the current token.
+ // State machine is implemented such that the final character may not be handled. This happens
+ // whenever you have to use a character from the next token to detect the end of the current
+ // token.
switch (this->state) {
case st_top:
@@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch)
void
QPDFTokenizer::inTokenReady(char ch)
{
- throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character "
- "while token is waiting");
+ throw std::logic_error(
+ "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting");
}
void
QPDFTokenizer::inBeforeToken(char ch)
{
- // Note: we specifically do not use ctype here. It is
- // locale-dependent.
+ // Note: we specifically do not use ctype here. It is locale-dependent.
if (isSpace(ch)) {
this->before_token = !this->include_ignorable;
this->in_token = this->include_ignorable;
@@ -421,11 +418,9 @@ void
QPDFTokenizer::inName(char ch)
{
if (isDelimiter(ch)) {
- // A C-locale whitespace character or delimiter terminates
- // token. It is important to unread the whitespace
- // character even though it is ignored since it may be the
- // newline after a stream keyword. Removing it here could
- // make the stream-reading code break on some files,
+ // A C-locale whitespace character or delimiter terminates token. It is important to unread
+ // the whitespace character even though it is ignored since it may be the newline after a
+ // stream keyword. Removing it here could make the stream-reading code break on some files,
// though not on any files in the test suite as of this
// writing.
@@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch)
} else {
QTC::TC("qpdf", "QPDFTokenizer bad name 1");
this->error_message = "name with stray # will not work with PDF >= 1.2";
- // Use null to encode a bad # -- this is reversed
- // in QPDF_Name::normalizeName.
+ // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName.
this->val += '\0';
this->state = st_name;
inName(ch);
@@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch)
} else {
QTC::TC("qpdf", "QPDFTokenizer bad name 2");
this->error_message = "name with stray # will not work with PDF >= 1.2";
- // Use null to encode a bad # -- this is reversed
- // in QPDF_Name::normalizeName.
+ // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName.
this->val += '\0';
this->val += this->hex_char;
this->state = st_name;
@@ -636,13 +629,10 @@ void
QPDFTokenizer::inLiteral(char ch)
{
if (isDelimiter(ch)) {
- // A C-locale whitespace character or delimiter terminates
- // token. It is important to unread the whitespace
- // character even though it is ignored since it may be the
- // newline after a stream keyword. Removing it here could
- // make the stream-reading code break on some files,
- // though not on any files in the test suite as of this
- // writing.
+ // A C-locale whitespace character or delimiter terminates token. It is important to unread
+ // the whitespace character even though it is ignored since it may be the newline after a
+ // stream keyword. Removing it here could make the stream-reading code break on some files,
+ // though not on any files in the test suite as of this writing.
this->in_token = false;
this->char_to_unread = ch;
@@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch)
if (++(this->digit_count) < 3) {
return;
}
- // We've accumulated \ddd. PDF Spec says to ignore
- // high-order overflow.
+ // We've accumulated \ddd. PDF Spec says to ignore high-order overflow.
}
this->val += char(this->char_code % 256);
this->state = st_in_string;
@@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF()
case st_decimal:
case st_literal:
QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
- // Push any delimiter to the state machine to finish off the final
- // token.
+ // Push any delimiter to the state machine to finish off the final token.
presentCharacter('\f');
this->in_token = true;
break;
@@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
qpdf_offset_t last_offset = input->getLastOffset();
qpdf_offset_t pos = input->tell();
- // Use QPDFWordTokenFinder to find EI surrounded by delimiters.
- // Then read the next several tokens or up to EOF. If we find any
- // suspicious-looking or tokens, this is probably still part of
- // the image data, so keep looking for EI. Stop at the first EI
- // that passes. If we get to the end without finding one, return
- // the last EI we found. Store the number of bytes expected in the
- // inline image including the EI and use that to break out of
- // inline image, falling back to the old method if needed.
+ // Use QPDFWordTokenFinder to find EI surrounded by delimiters. Then read the next several
+ // tokens or up to EOF. If we find any suspicious-looking or tokens, this is probably still part
+ // of the image data, so keep looking for EI. Stop at the first EI that passes. If we get to the
+ // end without finding one, return the last EI we found. Store the number of bytes expected in
+ // the inline image including the EI and use that to break out of inline image, falling back to
+ // the old method if needed.
bool okay = false;
bool first_try = true;
@@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
QPDFTokenizer check;
bool found_bad = false;
- // Look at the next 10 tokens or up to EOF. The next inline
- // image's image data would look like bad tokens, but there
- // will always be at least 10 tokens between one inline
- // image's EI and the next valid one's ID since width, height,
- // bits per pixel, and color space are all required as well as
- // a BI and ID. If we get 10 good tokens in a row or hit EOF,
- // we can be pretty sure we've found the actual EI.
+ // Look at the next 10 tokens or up to EOF. The next inline image's image data would look
+ // like bad tokens, but there will always be at least 10 tokens between one inline image's
+ // EI and the next valid one's ID since width, height, bits per pixel, and color space are
+ // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can
+ // be pretty sure we've found the actual EI.
for (int i = 0; i < 10; ++i) {
QPDFTokenizer::Token t = check.readToken(input, "checker", true);
token_type_e type = t.getType();
@@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
} else if (type == tt_bad) {
found_bad = true;
} else if (t.isWord()) {
- // The qpdf tokenizer lumps alphabetic and otherwise
- // uncategorized characters into "words". We recognize
- // strings of alphabetic characters as potential valid
- // operators for purposes of telling whether we're in
- // valid content or not. It's not perfect, but it
- // should work more reliably than what we used to do,
- // which was already good enough for the vast majority
- // of files.
+ // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into
+ // "words". We recognize strings of alphabetic characters as potential valid
+ // operators for purposes of telling whether we're in valid content or not. It's not
+ // perfect, but it should work more reliably than what we used to do, which was
+ // already good enough for the vast majority of files.
bool found_alpha = false;
bool found_non_printable = false;
bool found_other = false;
for (char ch: t.getValue()) {
if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
(ch == '*')) {
- // Treat '*' as alpha since there are valid
- // PDF operators that contain * along with
- // alphabetic characters.
+ // Treat '*' as alpha since there are valid PDF operators that contain *
+ // along with alphabetic characters.
found_alpha = true;
} else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) {
- // Compare ch as a signed char so characters
- // outside of 7-bit will be < 0.
+ // Compare ch as a signed char so characters outside of 7-bit will be < 0.
found_non_printable = true;
break;
} else {
@@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens()
QPDFTokenizer::Token
QPDFTokenizer::readToken(
- std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len)
+ InputSource& input, std::string const& context, bool allow_bad, size_t max_len)
{
- nextToken(*input, context, max_len);
+ nextToken(input, context, max_len);
Token token;
bool unread_char;
@@ -918,15 +897,22 @@ QPDFTokenizer::readToken(
} else {
throw QPDFExc(
qpdf_e_damaged_pdf,
- input->getName(),
+ input.getName(),
context,
- input->getLastOffset(),
+ input.getLastOffset(),
token.getErrorMessage());
}
}
return token;
}
+QPDFTokenizer::Token
+QPDFTokenizer::readToken(
+ std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len)
+{
+ return readToken(*input, context, allow_bad, max_len);
+}
+
bool
QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len)
{
@@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t
presentEOF();
if ((this->type == tt_eof) && (!this->allow_eof)) {
- // Nothing in the qpdf library calls readToken
- // without allowEOF anymore, so this case is not
- // exercised.
+ // Nothing in the qpdf library calls readToken without allowEOF anymore, so this
+ // case is not exercised.
this->type = tt_bad;
this->error_message = "unexpected EOF";
offset = input.getLastOffset();
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index 165b216f..45d6fb70 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -69,10 +69,9 @@ namespace
} // namespace
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
- // The PDF specification provides these filter abbreviations for
- // use in inline images, but according to table H.1 in the pre-ISO
- // versions of the PDF specification, Adobe Reader also accepts
- // them for stream filters.
+ // The PDF specification provides these filter abbreviations for use in inline images, but
+ // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
+ // accepts them for stream filters.
{"/AHx", "/ASCIIHexDecode"},
{"/A85", "/ASCII85Decode"},
{"/LZW", "/LZWDecode"},
@@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream(
length(length)
{
if (!stream_dict.isDictionary()) {
- throw std::logic_error("stream object instantiated with non-dictionary "
- "object for dictionary");
+ throw std::logic_error(
+ "stream object instantiated with non-dictionary object for dictionary");
}
auto descr = std::make_shared<QPDFValue::Description>(
qpdf->getFilename() + ", stream object " + og.unparse(' '));
@@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON(
case qpdf_sj_none:
case qpdf_sj_inline:
if (p != nullptr) {
- throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should "
- "only be supplied when json_data is file");
+ throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should only be supplied "
+ "when json_data is file");
}
break;
case qpdf_sj_file:
if (p == nullptr) {
- throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline must "
- "be supplied when json_data is file");
+ throw std::logic_error(
+ "QPDF_Stream::getStreamJSON: pipeline must be supplied when json_data is file");
}
if (data_filename.empty()) {
- throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
- "must be supplied when json_data is file");
+ throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename must be supplied "
+ "when json_data is file");
}
break;
}
@@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON(
break;
}
}
- // We can use unsafeShallowCopy because we are only
- // touching top-level keys.
+ // We can use unsafeShallowCopy because we are only touching top-level keys.
dict = this->stream_dict.unsafeShallowCopy();
dict.removeKey("/Length");
if (filter && filtered) {
@@ -408,8 +406,7 @@ QPDF_Stream::filterable(
return false;
}
- // filters now contains a list of filters to be applied in order.
- // See which ones we can support.
+ // filters now contains a list of filters to be applied in order. See which ones we can support.
// See if we can support any decode parameters that are specified.
@@ -428,9 +425,8 @@ QPDF_Stream::filterable(
}
}
- // Ignore /DecodeParms entirely if /Filters is empty. At least
- // one case of a file whose /DecodeParms was [ << >> ] when
- // /Filters was empty has been seen in the wild.
+ // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose
+ // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
if ((filters.size() != 0) && (decode_parms.size() != filters.size())) {
warn("stream /DecodeParms length is inconsistent with filters");
filterable = false;
@@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData(
return filter;
}
- // Construct the pipeline in reverse order. Force pipelines we
- // create to be deleted when this function finishes. Pipelines
- // created by QPDFStreamFilter objects will be deleted by those
+ // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
+ // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
// objects.
std::vector<std::shared_ptr<Pipeline>> to_delete;
@@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData(
QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
} else {
QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
- // This would be caused by programmer error on the
- // part of a library user, not by invalid input data.
+ // This would be caused by programmer error on the part of a library user, not by
+ // invalid input data.
throw std::runtime_error(
"stream data provider for " + og.unparse(' ') + " provided " +
std::to_string(actual_length) + " bytes instead of expected " +
@@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData(
warn("content normalization encountered bad tokens");
if (normalizer->lastTokenWasBad()) {
QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
- warn("normalized content ended with a bad token; you may be able "
- "to resolve this by coalescing content streams in combination "
- "with normalizing content. From the command line, specify "
- "--coalesce-contents");
+ warn("normalized content ended with a bad token; you may be able to resolve this by "
+ "coalescing content streams in combination with normalizing content. From the "
+ "command line, specify --coalesce-contents");
}
- warn("Resulting stream data may be corrupted but is may still useful "
- "for manual inspection. For more information on this warning, "
- "search for content normalization in the manual.");
+ warn("Resulting stream data may be corrupted but is may still useful for manual "
+ "inspection. For more information on this warning, search for content normalization "
+ "in the manual.");
}
return success;
diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc
index 74136060..3fda99c4 100644
--- a/libqpdf/QPDF_encryption.cc
+++ b/libqpdf/QPDF_encryption.cc
@@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const& password, char k1[key_bytes])
void
QPDF::trim_user_password(std::string& user_password)
{
- // Although unnecessary, this routine trims the padding string
- // from the end of a user password. Its only purpose is for
- // recovery of user passwords which is done in the test suite.
+ // Although unnecessary, this routine trims the padding string from the end of a user password.
+ // Its only purpose is for recovery of user passwords which is done in the test suite.
char const* cstr = user_password.c_str();
size_t len = user_password.length();
if (len < key_bytes) {
@@ -262,22 +261,17 @@ hash_V5(
int round_number = 0;
bool done = false;
while (!done) {
- // The hash algorithm has us setting K initially to the R5
- // value and then repeating a series of steps 64 times
- // before starting with the termination case testing. The
- // wording of the specification is very unclear as to the
- // exact number of times it should be run since the
- // wording about whether the initial setup counts as round
- // 0 or not is ambiguous. This code counts the initial
- // setup (R5) value as round 0, which appears to be
- // correct. This was determined to be correct by
- // increasing or decreasing the number of rounds by 1 or 2
- // from this value and generating 20 test files. In this
- // interpretation, all the test files worked with Adobe
- // Reader X. In the other configurations, many of the
- // files did not work, and we were accurately able to
- // predict which files didn't work by looking at the
- // conditions under which we terminated repetition.
+ // The hash algorithm has us setting K initially to the R5 value and then repeating a
+ // series of steps 64 times before starting with the termination case testing. The
+ // wording of the specification is very unclear as to the exact number of times it
+ // should be run since the wording about whether the initial setup counts as round 0 or
+ // not is ambiguous. This code counts the initial setup (R5) value as round 0, which
+ // appears to be correct. This was determined to be correct by increasing or decreasing
+ // the number of rounds by 1 or 2 from this value and generating 20 test files. In this
+ // interpretation, all the test files worked with Adobe Reader X. In the other
+ // configurations, many of the files did not work, and we were accurately able to
+ // predict which files didn't work by looking at the conditions under which we
+ // terminated repetition.
++round_number;
std::string K1 = password + K + udata;
@@ -291,11 +285,10 @@ hash_V5(
QUtil::unsigned_char_pointer(K.substr(16, 16)),
16);
- // E_mod_3 is supposed to be mod 3 of the first 16 bytes
- // of E taken as as a (128-bit) big-endian number. Since
- // (xy mod n) is equal to ((x mod n) + (y mod n)) mod n
- // and since 256 mod n is 1, we can just take the sums of
- // the the mod 3s of each byte to get the same result.
+ // E_mod_3 is supposed to be mod 3 of the first 16 bytes of E taken as as a (128-bit)
+ // big-endian number. Since (xy mod n) is equal to ((x mod n) + (y mod n)) mod n and
+ // since 256 mod n is 1, we can just take the sums of the the mod 3s of each byte to get
+ // the same result.
int E_mod_3 = 0;
for (unsigned int i = 0; i < 16; ++i) {
E_mod_3 += static_cast<unsigned char>(E.at(i));
@@ -344,8 +337,7 @@ QPDF::compute_data_key(
std::string result = encryption_key;
if (encryption_V >= 5) {
- // Algorithm 3.1a (PDF 1.7 extension level 3): just use
- // encryption key straight.
+ // Algorithm 3.1a (PDF 1.7 extension level 3): just use encryption key straight.
return result;
}
@@ -370,9 +362,8 @@ std::string
QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data)
{
if (data.getV() >= 5) {
- // For V >= 5, the encryption key is generated and stored in
- // the file, encrypted separately with both user and owner
- // passwords.
+ // For V >= 5, the encryption key is generated and stored in the file, encrypted separately
+ // with both user and owner passwords.
return recover_encryption_key_with_password(password, data);
} else {
// For V < 5, the encryption key is derived from the user
@@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const& password, Encrypti
{
// Algorithm 3.2 from the PDF 1.7 Reference Manual
- // This code does not properly handle Unicode passwords.
- // Passwords are supposed to be converted from OS codepage
- // characters to PDFDocEncoding. Unicode passwords are supposed
- // to be converted to OS codepage before converting to
- // PDFDocEncoding. We instead require the password to be
- // presented in its final form.
+ // This code does not properly handle Unicode passwords. Passwords are supposed to be converted
+ // from OS codepage characters to PDFDocEncoding. Unicode passwords are supposed to be
+ // converted to OS codepage before converting to PDFDocEncoding. We instead require the
+ // password to be presented in its final form.
MD5 md5;
md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes);
@@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password(
{
// Algorithm 3.2a from the PDF 1.7 extension level 3
- // This code does not handle Unicode passwords correctly.
- // Empirical evidence suggests that most viewers don't. We are
- // supposed to process the input string with the SASLprep (RFC
- // 4013) profile of stringprep (RFC 3454) and then convert the
- // result to UTF-8.
+ // This code does not handle Unicode passwords correctly. Empirical evidence suggests that most
+ // viewers don't. We are supposed to process the input string with the SASLprep (RFC 4013)
+ // profile of stringprep (RFC 3454) and then convert the result to UTF-8.
perms_valid = false;
std::string key_password = truncate_password_V5(password);
@@ -738,18 +725,16 @@ QPDF::initializeEncryption()
}
m->encp->encryption_initialized = true;
- // After we initialize encryption parameters, we must used stored
- // key information and never look at /Encrypt again. Otherwise,
- // things could go wrong if someone mutates the encryption
+ // After we initialize encryption parameters, we must used stored key information and never look
+ // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
// dictionary.
if (!m->trailer.hasKey("/Encrypt")) {
return;
}
- // Go ahead and set m->encrypted here. That way, isEncrypted
- // will return true even if there were errors reading the
- // encryption dictionary.
+ // Go ahead and set m->encrypted here. That way, isEncrypted will return true even if there
+ // were errors reading the encryption dictionary.
m->encp->encrypted = true;
std::string id1;
@@ -757,9 +742,8 @@ QPDF::initializeEncryption()
if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
id1 = id_obj.getArrayItem(0).getStringValue();
} else {
- // Treating a missing ID as the empty string enables qpdf to
- // decrypt some invalid encrypted files with no /ID that
- // poppler can read but Adobe Reader can't.
+ // Treating a missing ID as the empty string enables qpdf to decrypt some invalid encrypted
+ // files with no /ID that poppler can read but Adobe Reader can't.
warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
}
@@ -800,8 +784,8 @@ QPDF::initializeEncryption()
std::string U = encryption_dict.getKey("/U").getStringValue();
int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue());
- // If supporting new encryption R/V values, remember to update
- // error message inside this if statement.
+ // If supporting new encryption R/V values, remember to update error message inside this if
+ // statement.
if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) {
throw QPDFExc(
qpdf_e_unsupported,
@@ -893,8 +877,7 @@ QPDF::initializeEncryption()
QTC::TC("qpdf", "QPDF_encryption CFM AESV3");
method = e_aesv3;
} else {
- // Don't complain now -- maybe we won't need
- // to reference this type.
+ // Don't complain now -- maybe we won't need to reference this type.
method = e_unknown;
}
}
@@ -908,20 +891,15 @@ QPDF::initializeEncryption()
m->encp->cf_stream = interpretCF(m->encp, StmF);
m->encp->cf_string = interpretCF(m->encp, StrF);
if (EFF.isName()) {
- // qpdf does not use this for anything other than
- // informational purposes. This is intended to instruct
- // conforming writers on which crypt filter should be used
- // when new file attachments are added to a PDF file, but
- // qpdf never generates encrypted files with non-default
- // crypt filters. Prior to 10.2, I was under the mistaken
- // impression that this was supposed to be used for
- // decrypting attachments, but the code was wrong in a way
- // that turns out not to have mattered because no writers
- // were generating files the way I was imagining. Still,
- // providing this information could be useful when looking
- // at a file generated by something else, such as Acrobat
- // when specifying that only attachments should be
- // encrypted.
+ // qpdf does not use this for anything other than informational purposes. This is
+ // intended to instruct conforming writers on which crypt filter should be used when new
+ // file attachments are added to a PDF file, but qpdf never generates encrypted files
+ // with non-default crypt filters. Prior to 10.2, I was under the mistaken impression
+ // that this was supposed to be used for decrypting attachments, but the code was wrong
+ // in a way that turns out not to have mattered because no writers were generating files
+ // the way I was imagining. Still, providing this information could be useful when
+ // looking at a file generated by something else, such as Acrobat when specifying that
+ // only attachments should be encrypted.
m->encp->cf_file = interpretCF(m->encp, EFF);
} else {
m->encp->cf_file = m->encp->cf_stream;
@@ -935,8 +913,7 @@ QPDF::initializeEncryption()
m->encp->owner_password_matched =
check_owner_password(m->encp->user_password, m->encp->provided_password, data);
if (m->encp->owner_password_matched && (V < 5)) {
- // password supplied was owner password; user_password has
- // been initialized for V < 5
+ // password supplied was owner password; user_password has been initialized for V < 5
if (getTrimmedUserPassword() == m->encp->provided_password) {
m->encp->user_password_matched = true;
QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5");
@@ -958,14 +935,12 @@ QPDF::initializeEncryption()
if (m->provided_password_is_hex_key) {
m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password);
} else if (V < 5) {
- // For V < 5, the user password is encrypted with the owner
- // password, and the user password is always used for
- // computing the encryption key.
+ // For V < 5, the user password is encrypted with the owner password, and the user password
+ // is always used for computing the encryption key.
m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data);
} else {
- // For V >= 5, either password can be used independently to
- // compute the encryption key, and neither password can be
- // used to recover the other.
+ // For V >= 5, either password can be used independently to compute the encryption key, and
+ // neither password can be used to recover the other.
bool perms_valid;
m->encp->encryption_key =
recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid);
@@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og)
default:
warn(damagedPDF("unknown encryption filter for strings (check /StrF in "
"/Encrypt dictionary); strings may be decrypted improperly"));
- // To avoid repeated warnings, reset cf_string. Assume
- // we'd want to use AES if V == 4.
+ // To avoid repeated warnings, reset cf_string. Assume we'd want to use AES if V == 4.
m->encp->cf_string = e_aes;
use_aes = true;
break;
@@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og)
} else {
QTC::TC("qpdf", "QPDF_encryption rc4 decode string");
size_t vlen = str.length();
- // Using std::shared_ptr guarantees that tmp will
- // be freed even if rc4.process throws an exception.
+ // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an
+ // exception.
auto tmp = QUtil::make_unique_cstr(str);
RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length()));
auto data = QUtil::unsigned_char_pointer(tmp.get());
@@ -1154,8 +1128,7 @@ QPDF::decryptStream(
file->getLastOffset(),
"unknown encryption filter for streams (check " + method_source +
"); streams may be decrypted improperly"));
- // To avoid repeated warnings, reset cf_stream. Assume
- // we'd want to use AES if V == 4.
+ // To avoid repeated warnings, reset cf_stream. Assume we'd want to use AES if V == 4.
encp->cf_stream = e_aes;
use_aes = true;
break;
diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc
index 66d4b314..c74cf4f7 100644
--- a/libqpdf/QPDF_json.cc
+++ b/libqpdf/QPDF_json.cc
@@ -12,8 +12,7 @@
#include <algorithm>
#include <cstring>
-// This chart shows an example of the state transitions that would
-// occur in parsing a minimal file.
+// This chart shows an example of the state transitions that would occur in parsing a minimal file.
// | st_initial
// { | -> st_top
@@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const& value)
object_stack.pop_back();
}
} else if ((state == st_top) && (from_state == st_qpdf)) {
- // Handle dangling indirect object references which the PDF spec says to
- // treat as nulls. It's tempting to make this an error, but that would
- // be wrong since valid input files may have these.
+ // Handle dangling indirect object references which the PDF spec says to treat as nulls.
+ // It's tempting to make this an error, but that would be wrong since valid input files may
+ // have these.
for (auto& oc: pdf.m->obj_cache) {
if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) {
QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
@@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar()
void
QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next)
{
- // Use this method when the next state is for processing a nested
- // dictionary.
+ // Use this method when the next state is for processing a nested dictionary.
if (value.isDictionary()) {
this->next_state = next;
} else {
@@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
error(value.getStart(), "calledgetallpages must be a boolean");
}
} else {
- // ignore unknown keys for forward compatibility and to
- // skip keys we don't care about like "maxobjectid".
+ // ignore unknown keys for forward compatibility and to skip keys we don't care about
+ // like "maxobjectid".
QTC::TC("qpdf", "QPDF_json ignore second-level key");
next_state = st_ignore;
}
@@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
this->pdf.m->trailer = makeObject(value);
setObjectDescription(this->pdf.m->trailer, value);
} else if (key == "stream") {
- // Don't need to set saw_stream here since there's already
- // an error.
+ // Don't need to set saw_stream here since there's already an error.
QTC::TC("qpdf", "QPDF_json trailer stream");
error(value.getStart(), "the trailer may not be a stream");
next_state = st_ignore;
@@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
auto uninitialized = QPDFObjectHandle();
if (key == "dict") {
this->saw_dict = true;
- // Since a stream dictionary must be a dictionary, we can
- // use nestedState to transition to st_value.
+ // Since a stream dictionary must be a dictionary, we can use nestedState to transition
+ // to st_value.
nestedState("stream.dict", value, st_object);
auto dict = makeObject(value);
if (dict.isDictionary()) {
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
index 65357b9a..faebf5b6 100644
--- a/libqpdf/QPDF_linearization.cc
+++ b/libqpdf/QPDF_linearization.cc
@@ -22,8 +22,8 @@ load_vector_int(
BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field)
{
bool append = vec.empty();
- // nitems times, read bits_wanted from the given bit stream,
- // storing results in the ith vector entry.
+ // nitems times, read bits_wanted from the given bit stream, storing results in the ith vector
+ // entry.
for (size_t i = 0; i < QIntC::to_size(nitems); ++i) {
if (append) {
@@ -34,8 +34,8 @@ load_vector_int(
if (QIntC::to_int(vec.size()) != nitems) {
throw std::logic_error("vector has wrong size in load_vector_int");
}
- // The PDF spec says that each hint table starts at a byte
- // boundary. Each "row" actually must start on a byte boundary.
+ // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must
+ // start on a byte boundary.
bit_stream.skipToNextByte();
}
@@ -49,8 +49,8 @@ load_vector_vector(
int bits_wanted,
std::vector<int> T::*vec2)
{
- // nitems1 times, read nitems2 (from the ith element of vec1) items
- // into the vec2 vector field of the ith item of vec1.
+ // nitems1 times, read nitems2 (from the ith element of vec1) items into the vec2 vector field
+ // of the ith item of vec1.
for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) {
for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) {
(vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted)));
@@ -83,18 +83,15 @@ QPDF::checkLinearization()
bool
QPDF::isLinearized()
{
- // If the first object in the file is a dictionary with a suitable
- // /Linearized key and has an /L key that accurately indicates the
- // file size, initialize m->lindict and return true.
-
- // A linearized PDF spec's first object will be contained within
- // the first 1024 bytes of the file and will be a dictionary with
- // a valid /Linearized key. This routine looks for that and does
- // no additional validation.
-
- // The PDF spec says the linearization dictionary must be
- // completely contained within the first 1024 bytes of the file.
- // Add a byte for a null terminator.
+ // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L
+ // key that accurately indicates the file size, initialize m->lindict and return true.
+
+ // A linearized PDF spec's first object will be contained within the first 1024 bytes of the
+ // file and will be a dictionary with a valid /Linearized key. This routine looks for that and
+ // does no additional validation.
+
+ // The PDF spec says the linearization dictionary must be completely contained within the first
+ // 1024 bytes of the file. Add a byte for a null terminator.
static int const tbuf_size = 1025;
auto b = std::make_unique<char[]>(tbuf_size);
@@ -161,8 +158,8 @@ QPDF::isLinearized()
void
QPDF::readLinearizationData()
{
- // This function throws an exception (which is trapped by
- // checkLinearization()) for any errors that prevent loading.
+ // This function throws an exception (which is trapped by checkLinearization()) for any errors
+ // that prevent loading.
if (!isLinearized()) {
throw std::logic_error("called readLinearizationData for file"
@@ -206,8 +203,8 @@ QPDF::readLinearizationData()
int H1_offset = 0;
int H1_length = 0;
if (H_items.size() == 4) {
- // Acrobat doesn't read or write these (as PDF 1.4), so we
- // don't have a way to generate a test case.
+ // Acrobat doesn't read or write these (as PDF 1.4), so we don't have a way to generate a
+ // test case.
// QTC::TC("qpdf", "QPDF overflow hint table");
H1_offset = H_items.at(2);
H1_length = H_items.at(3);
@@ -224,9 +221,8 @@ QPDF::readLinearizationData()
// Store linearization parameter data
- // Various places in the code use linp.npages, which is
- // initialized from N, to pre-allocate memory, so make sure it's
- // accurate and bail right now if it's not.
+ // Various places in the code use linp.npages, which is initialized from N, to pre-allocate
+ // memory, so make sure it's accurate and bail right now if it's not.
if (N.getIntValue() != static_cast<long long>(getAllPages().size())) {
throw damagedPDF("linearization hint table", "/N does not match number of pages");
}
@@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
QPDFObjectHandle Hdict = H.getDict();
- // Some versions of Acrobat make /Length indirect and place it
- // immediately after the stream, increasing length to cover it,
- // even though the specification says all objects in the
- // linearization parameter dictionary must be direct. We have to
- // get the file position of the end of length in this case.
+ // Some versions of Acrobat make /Length indirect and place it immediately after the stream,
+ // increasing length to cover it, even though the specification says all objects in the
+ // linearization parameter dictionary must be direct. We have to get the file position of the
+ // end of length in this case.
QPDFObjectHandle length_obj = Hdict.getKey("/Length");
if (length_obj.isIndirect()) {
QTC::TC("qpdf", "QPDF hint table length indirect");
@@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
void
QPDF::readHPageOffset(BitStream h)
{
- // All comments referring to the PDF spec refer to the spec for
- // version 1.4.
+ // All comments referring to the PDF spec refer to the spec for version 1.4.
HPageOffset& t = m->page_offset_hints;
@@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h)
load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present);
for (size_t i = 0; i < toS(nitems); ++i) {
if (entries.at(i).signature_present) {
- // Skip 128-bit MD5 hash. These are not supported by
- // acrobat, so they should probably never be there. We
- // have no test case for this.
+ // Skip 128-bit MD5 hash. These are not supported by acrobat, so they should probably
+ // never be there. We have no test case for this.
for (int j = 0; j < 4; ++j) {
(void)h.getBits(32);
}
@@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric& t)
bool
QPDF::checkLinearizationInternal()
{
- // All comments referring to the PDF spec refer to the spec for
- // version 1.4.
+ // All comments referring to the PDF spec refer to the spec for version 1.4.
// Check all values in linearization parameter dictionary
@@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal()
"; file = " + std::to_string(m->file->tell()));
}
- // P: first page number -- Implementation note 124 says Acrobat
- // ignores this value, so we will too.
+ // P: first page number -- Implementation note 124 says Acrobat ignores this value, so we will
+ // too.
- // Check numbering of compressed objects in each xref section.
- // For linearized files, all compressed objects are supposed to be
- // at the end of the containing xref section if any object streams
- // are in use.
+ // Check numbering of compressed objects in each xref section. For linearized files, all
+ // compressed objects are supposed to be at the end of the containing xref section if any object
+ // streams are in use.
if (m->uncompressed_after_compressed) {
- linearizationWarning("linearized file contains an uncompressed object"
- " after a compressed one in a cross-reference stream");
+ linearizationWarning("linearized file contains an uncompressed object after a compressed "
+ "one in a cross-reference stream");
}
- // Further checking requires optimization and order calculation.
- // Don't allow optimization to make changes. If it has to, then
- // the file is not properly linearized. We use the xref table to
- // figure out which objects are compressed and which are
- // uncompressed.
+ // Further checking requires optimization and order calculation. Don't allow optimization to
+ // make changes. If it has to, then the file is not properly linearized. We use the xref table
+ // to figure out which objects are compressed and which are uncompressed.
{ // local scope
std::map<int, int> object_stream_data;
for (auto const& iter: m->xref_table) {
@@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal()
calculateLinearizationData(object_stream_data);
}
- // E: offset of end of first page -- Implementation note 123 says
- // Acrobat includes on extra object here by mistake. pdlin fails
- // to place thumbnail images in section 9, so when thumbnails are
- // present, it also gets the wrong value for /E. It also doesn't
- // count outlines here when it should even though it places them
- // in part 6. This code fails to put thread information
- // dictionaries in part 9, so it actually gets the wrong value for
- // E when threads are present. In that case, it would probably
- // agree with pdlin. As of this writing, the test suite doesn't
- // contain any files with threads.
+ // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
+ // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
+ // thumbnails are present, it also gets the wrong value for /E. It also doesn't count outlines
+ // here when it should even though it places them in part 6. This code fails to put thread
+ // information dictionaries in part 9, so it actually gets the wrong value for E when threads
+ // are present. In that case, it would probably agree with pdlin. As of this writing, the test
+ // suite doesn't contain any files with threads.
if (m->part6.empty()) {
stopOnError("linearization part 6 unexpectedly empty");
@@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const& og)
break;
case 2:
- // For compressed objects, return the offset of the object
- // stream that contains them.
+ // For compressed objects, return the offset of the object stream that contains them.
result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
break;
@@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n)
"no xref table entry for " + std::to_string(first_object + i) + " 0");
} else {
if (m->obj_cache.count(og) == 0) {
- stopOnError("found unknown object while"
- " calculating length for linearization data");
+ stopOnError("found unknown object while calculating length for linearization data");
}
length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og));
}
@@ -624,22 +608,17 @@ void
QPDF::checkHPageOffset(
std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj)
{
- // Implementation note 126 says Acrobat always sets
- // delta_content_offset and delta_content_length in the page
- // offset header dictionary to 0. It also states that
- // min_content_offset in the per-page information is always 0,
- // which is an incorrect value.
-
- // Implementation note 127 explains that Acrobat always sets item
- // 8 (min_content_length) to zero, item 9
- // (nbits_delta_content_length) to the value of item 5
- // (nbits_delta_page_length), and item 7 of each per-page hint
- // table (delta_content_length) to item 2 (delta_page_length) of
- // that entry. Acrobat ignores these values when reading files.
-
- // Empirically, it also seems that Acrobat sometimes puts items
- // under a page's /Resources dictionary in with shared objects
- // even when they are private.
+ // Implementation note 126 says Acrobat always sets delta_content_offset and
+ // delta_content_length in the page offset header dictionary to 0. It also states that
+ // min_content_offset in the per-page information is always 0, which is an incorrect value.
+
+ // Implementation note 127 explains that Acrobat always sets item 8 (min_content_length) to
+ // zero, item 9 (nbits_delta_content_length) to the value of item 5 (nbits_delta_page_length),
+ // and item 7 of each per-page hint table (delta_content_length) to item 2 (delta_page_length)
+ // of that entry. Acrobat ignores these values when reading files.
+
+ // Empirically, it also seems that Acrobat sometimes puts items under a page's /Resources
+ // dictionary in with shared objects even when they are private.
int npages = toI(pages.size());
qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
@@ -670,13 +649,12 @@ QPDF::checkHPageOffset(
std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects));
}
- // Use value for number of objects in hint table rather than
- // computed value if there is a discrepancy.
+ // Use value for number of objects in hint table rather than computed value if there is a
+ // discrepancy.
int length = lengthNextN(first_object, h_nobjects);
int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length);
if (length != h_length) {
- // This condition almost certainly indicates a bad hint
- // table or a bug in this code.
+ // This condition almost certainly indicates a bad hint table or a bug in this code.
linearizationWarning(
"page length mismatch for page " + std::to_string(pageno) + ": hint table = " +
std::to_string(h_length) + "; computed length = " + std::to_string(length) +
@@ -690,8 +668,8 @@ QPDF::checkHPageOffset(
std::set<int> computed_shared;
if ((pageno == 0) && (he.nshared_objects > 0)) {
- // pdlin and Acrobat both do this even though the spec
- // states clearly and unambiguously that they should not.
+ // pdlin and Acrobat both do this even though the spec states clearly and unambiguously
+ // that they should not.
linearizationWarning("page 0 has shared identifier entries");
}
@@ -724,9 +702,8 @@ QPDF::checkHPageOffset(
for (int iter: computed_shared) {
if (!hint_shared.count(iter)) {
- // Acrobat does not put some things including at least
- // built-in fonts and procsets here, at least in some
- // cases.
+ // Acrobat does not put some things including at least built-in fonts and procsets
+ // here, at least in some cases.
linearizationWarning(
("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) +
": in computed list but not hint table"));
@@ -738,31 +715,26 @@ QPDF::checkHPageOffset(
void
QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj)
{
- // Implementation note 125 says shared object groups always
- // contain only one object. Implementation note 128 says that
- // Acrobat always nbits_nobjects to zero. Implementation note 130
- // says that Acrobat does not support more than one shared object
- // per group. These are all consistent.
+ // Implementation note 125 says shared object groups always contain only one object.
+ // Implementation note 128 says that Acrobat always nbits_nobjects to zero. Implementation note
+ // 130 says that Acrobat does not support more than one shared object per group. These are all
+ // consistent.
- // Implementation note 129 states that MD5 signatures are not
- // implemented in Acrobat, so signature_present must always be
- // zero.
+ // Implementation note 129 states that MD5 signatures are not implemented in Acrobat, so
+ // signature_present must always be zero.
- // Implementation note 131 states that first_shared_obj and
- // first_shared_offset have meaningless values for single-page
- // files.
+ // Implementation note 131 states that first_shared_obj and first_shared_offset have meaningless
+ // values for single-page files.
- // Empirically, Acrobat and pdlin generate incorrect values for
- // these whenever there are no shared objects not referenced by
- // the first page (i.e., nshared_total == nshared_first_page).
+ // Empirically, Acrobat and pdlin generate incorrect values for these whenever there are no
+ // shared objects not referenced by the first page (i.e., nshared_total == nshared_first_page).
HSharedObject& so = m->shared_object_hints;
if (so.nshared_total < so.nshared_first_page) {
linearizationWarning("shared object hint table: ntotal < nfirst_page");
} else {
- // The first nshared_first_page objects are consecutive
- // objects starting with the first page object. The rest are
- // consecutive starting from the first_shared_obj object.
+ // The first nshared_first_page objects are consecutive objects starting with the first page
+ // object. The rest are consecutive starting from the first_shared_obj object.
int cur_object = pages.at(0).getObjectID();
for (int i = 0; i < so.nshared_total; ++i) {
if (i == so.nshared_first_page) {
@@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in
void
QPDF::checkHOutlines()
{
- // Empirically, Acrobat generates the correct value for the object
- // number but incorrectly stores the next object number's offset
- // as the offset, at least when outlines appear in part 6. It
- // also generates an incorrect value for length (specifically, the
- // length that would cover the correct number of objects from the
- // wrong starting place). pdlin appears to generate correct
+ // Empirically, Acrobat generates the correct value for the object number but incorrectly stores
+ // the next object number's offset as the offset, at least when outlines appear in part 6. It
+ // also generates an incorrect value for length (specifically, the length that would cover the
+ // correct number of objects from the wrong starting place). pdlin appears to generate correct
// values in those cases.
if (m->c_outline_data.nobjects == m->outline_hints.nobjects) {
@@ -831,9 +801,8 @@ QPDF::checkHOutlines()
// Check length and offset. Acrobat gets these wrong.
QPDFObjectHandle outlines = getRoot().getKey("/Outlines");
if (!outlines.isIndirect()) {
- // This case is not exercised in test suite since not
- // permitted by the spec, but if this does occur, the
- // code below would fail.
+ // This case is not exercised in test suite since not permitted by the spec, but if
+ // this does occur, the code below would fail.
linearizationWarning("/Outlines key of root dictionary is not indirect");
return;
}
@@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal()
qpdf_offset_t
QPDF::adjusted_offset(qpdf_offset_t offset)
{
- // All offsets >= H_offset have to be increased by H_length
- // since all hint table location values disregard the hint table
- // itself.
+ // All offsets >= H_offset have to be increased by H_length since all hint table location values
+ // disregard the hint table itself.
if (offset >= m->linp.H_offset) {
return offset + m->linp.H_length;
}
@@ -971,8 +939,8 @@ QPDF::dumpHSharedObject()
*m->log->getInfo() << "Shared Object " << i << ":\n"
<< " group length: " << se.delta_group_length + t.min_group_length
<< "\n";
- // PDF spec says signature present nobjects_minus_one are
- // always 0, so print them only if they have a non-zero value.
+ // PDF spec says signature present nobjects_minus_one are always 0, so print them only if
+ // they have a non-zero value.
if (se.signature_present) {
*m->log->getInfo() << " signature present\n";
}
@@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric& t)
void
QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
{
- // This function calculates the ordering of objects, divides them
- // into the appropriate parts, and computes some values for the
- // linearization parameter dictionary and hint tables. The file
- // must be optimized (via calling optimize()) prior to calling
- // this function. Note that actual offsets and lengths are not
- // computed here, but anything related to object ordering is.
+ // This function calculates the ordering of objects, divides them into the appropriate parts,
+ // and computes some values for the linearization parameter dictionary and hint tables. The
+ // file must be optimized (via calling optimize()) prior to calling this function. Note that
+ // actual offsets and lengths are not computed here, but anything related to object ordering is.
if (m->object_to_obj_users.empty()) {
- // Note that we can't call optimize here because we don't know
- // whether it should be called with or without allow changes.
- throw std::logic_error("INTERNAL ERROR: QPDF::calculateLinearizationData "
- "called before optimize()");
+ // Note that we can't call optimize here because we don't know whether it should be called
+ // with or without allow changes.
+ throw std::logic_error(
+ "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()");
}
- // Separate objects into the categories sufficient for us to
- // determine which part of the linearized file should contain the
- // object. This categorization is useful for other purposes as
- // well. Part numbers refer to version 1.4 of the PDF spec.
+ // Separate objects into the categories sufficient for us to determine which part of the
+ // linearized file should contain the object. This categorization is useful for other purposes
+ // as well. Part numbers refer to version 1.4 of the PDF spec.
- // Parts 1, 3, 5, 10, and 11 don't contain any objects from the
- // original file (except the trailer dictionary in part 11).
+ // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the
+ // trailer dictionary in part 11).
- // Part 4 is the document catalog (root) and the following root
- // keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction,
- // /AcroForm, /Encrypt. Note that Thread information dictionaries
- // are supposed to appear in part 9, but we are disregarding that
- // recommendation for now.
+ // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences,
+ // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information
+ // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation
+ // for now.
- // Part 6 is the first page section. It includes all remaining
- // objects referenced by the first page including shared objects
- // but not including thumbnails. Additionally, if /PageMode is
+ // Part 6 is the first page section. It includes all remaining objects referenced by the first
+ // page including shared objects but not including thumbnails. Additionally, if /PageMode is
// /Outlines, then information from /Outlines also appears here.
- // Part 7 contains remaining objects private to pages other than
- // the first page.
+ // Part 7 contains remaining objects private to pages other than the first page.
- // Part 8 contains all remaining shared objects except those that
- // are shared only within thumbnails.
+ // Part 8 contains all remaining shared objects except those that are shared only within
+ // thumbnails.
// Part 9 contains all remaining objects.
@@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
}
}
- // Generate ordering for objects in the output file. Sometimes we
- // just dump right from a set into a vector. Rather than
- // optimizing this by going straight into the vector, we'll leave
- // these phases separate for now. That way, this section can be
- // concerned only with ordering, and the above section can be
- // considered only with categorization. Note that sets of
- // QPDFObjGens are sorted by QPDFObjGen. In a linearized file,
- // objects appear in sequence with the possible exception of hints
- // tables which we won't see here anyway. That means that running
- // calculateLinearizationData() on a linearized file should give
- // results identical to the original file ordering.
-
- // We seem to traverse the page tree a lot in this code, but we
- // can address this for a future code optimization if necessary.
- // Premature optimization is the root of all evil.
+ // Generate ordering for objects in the output file. Sometimes we just dump right from a set
+ // into a vector. Rather than optimizing this by going straight into the vector, we'll leave
+ // these phases separate for now. That way, this section can be concerned only with ordering,
+ // and the above section can be considered only with categorization. Note that sets of
+ // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with
+ // the possible exception of hints tables which we won't see here anyway. That means that
+ // running calculateLinearizationData() on a linearized file should give results identical to
+ // the original file ordering.
+
+ // We seem to traverse the page tree a lot in this code, but we can address this for a future
+ // code optimization if necessary. Premature optimization is the root of all evil.
std::vector<QPDFObjectHandle> pages;
{ // local scope
- // Map all page objects to the containing object stream. This
- // should be a no-op in a properly linearized file.
+ // Map all page objects to the containing object stream. This should be a no-op in a
+ // properly linearized file.
for (auto oh: getAllPages()) {
pages.push_back(getUncompressedObject(oh, object_stream_data));
}
}
int npages = toI(pages.size());
- // We will be initializing some values of the computed hint
- // tables. Specifically, we can initialize any items that deal
- // with object numbers or counts but not any items that deal with
- // lengths or offsets. The code that writes linearized files will
- // have to fill in these values during the first pass. The
- // validation code can compute them relatively easily given the
- // rest of the information.
-
- // npages is the size of the existing pages vector, which has been
- // created by traversing the pages tree, and as such is a
- // reasonable size.
+ // We will be initializing some values of the computed hint tables. Specifically, we can
+ // initialize any items that deal with object numbers or counts but not any items that deal with
+ // lengths or offsets. The code that writes linearized files will have to fill in these values
+ // during the first pass. The validation code can compute them relatively easily given the rest
+ // of the information.
+
+ // npages is the size of the existing pages vector, which has been created by traversing the
+ // pages tree, and as such is a reasonable size.
m->c_linp.npages = npages;
m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages));
@@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
m->part4.push_back(getObject(og));
}
- // Part 6: first page objects. Note: implementation note 124
- // states that Acrobat always treats page 0 as the first page for
- // linearization regardless of /OpenAction. pdlin doesn't provide
- // any option to set this and also disregards /OpenAction. We
- // will do the same.
+ // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats
+ // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide
+ // any option to set this and also disregards /OpenAction. We will do the same.
// First, place the actual first page object itself.
if (pages.empty()) {
@@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
m->c_linp.first_page_object = pages.at(0).getObjectID();
m->part6.push_back(pages.at(0));
- // The PDF spec "recommends" an order for the rest of the objects,
- // but we are going to disregard it except to the extent that it
- // groups private and shared objects contiguously for the sake of
- // hint tables.
+ // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard
+ // it except to the extent that it groups private and shared objects contiguously for the sake
+ // of hint tables.
for (auto const& og: lc_first_page_private) {
m->part6.push_back(getObject(og));
@@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
pushOutlinesToPart(m->part6, lc_outlines, object_stream_data);
}
- // Fill in page offset hint table information for the first page.
- // The PDF spec says that nshared_objects should be zero for the
- // first page. pdlin does not appear to obey this, but it fills
- // in garbage values for all the shared object identifiers on the
- // first page.
+ // Fill in page offset hint table information for the first page. The PDF spec says that
+ // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but
+ // it fills in garbage values for all the shared object identifiers on the first page.
m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size());
@@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
lc_other_page_private.erase(page_og);
m->part7.push_back(pages.at(i));
- // Place all non-shared objects referenced by this page,
- // updating the page object count for the hint table.
+ // Place all non-shared objects referenced by this page, updating the page object count for
+ // the hint table.
m->c_page_offset_data.entries.at(i).nobjects = 1;
@@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
// Part 9: other objects
- // The PDF specification makes recommendations on ordering here.
- // We follow them only to a limited extent. Specifically, we put
- // the pages tree first, then private thumbnail objects in page
- // order, then shared thumbnail objects, and then outlines (unless
- // in part 6). After that, we throw all remaining objects in
- // arbitrary order.
+ // The PDF specification makes recommendations on ordering here. We follow them only to a
+ // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in
+ // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that,
+ // we throw all remaining objects in arbitrary order.
// Place the pages tree.
std::set<QPDFObjGen> pages_ogs =
@@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
}
}
- // Place private thumbnail images in page order. Slightly more
- // information would be required if we were going to bother with
- // thumbnail hint tables.
+ // Place private thumbnail images in page order. Slightly more information would be required if
+ // we were going to bother with thumbnail hint tables.
for (size_t i = 0; i < toS(npages); ++i) {
QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb");
thumb = getUncompressedObject(thumb, object_stream_data);
@@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
lc_thumbnail_private.erase(thumb_og);
m->part9.push_back(thumb);
} else {
- // No internal error this time...there's nothing to
- // stop this object from having been referred to
- // somewhere else outside of a page's /Thumb, and if
- // it had been, there's nothing to prevent it from
- // having been in some set other than
+ // No internal error this time...there's nothing to stop this object from having
+ // been referred to somewhere else outside of a page's /Thumb, and if it had been,
+ // there's nothing to prevent it from having been in some set other than
// lc_thumbnail_private.
}
std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))];
@@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
}
}
if (!lc_thumbnail_private.empty()) {
- stopOnError("INTERNAL ERROR: "
- "QPDF::calculateLinearizationData: lc_thumbnail_private "
- "not empty after placing thumbnails");
+ stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not "
+ "empty after placing thumbnails");
}
// Place shared thumbnail objects
@@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted));
}
- // Calculate shared object hint table information including
- // references to shared objects from page offset hint data.
+ // Calculate shared object hint table information including references to shared objects from
+ // page offset hint data.
- // The shared object hint table consists of all part 6 (whether
- // shared or not) in order followed by all part 8 objects in
- // order. Add the objects to shared object data keeping a map of
- // object number to index. Then populate the shared object
- // information for the pages.
+ // The shared object hint table consists of all part 6 (whether shared or not) in order followed
+ // by all part 8 objects in order. Add the objects to shared object data keeping a map of
+ // object number to index. Then populate the shared object information for the pages.
- // Note that two objects never have the same object number, so we
- // can map from object number only without regards to generation.
+ // Note that two objects never have the same object number, so we can map from object number
+ // only without regards to generation.
std::map<int, int> obj_to_index;
m->c_shared_object_data.nshared_first_page = toI(m->part6.size());
@@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
stopOnError("shared object hint table has wrong number of entries");
}
- // Now compute the list of shared objects for each page after the
- // first page.
+ // Now compute the list of shared objects for each page after the first page.
for (size_t i = 1; i < toS(npages); ++i) {
CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i);
@@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN(
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
- // Figure out the length of a series of n consecutive objects in
- // the output file starting with whatever object in_object from
- // the input file mapped to.
+ // Figure out the length of a series of n consecutive objects in the output file starting with
+ // whatever object in_object from the input file mapped to.
if (obj_renumber.count(in_object) == 0) {
- stopOnError("found object that is not renumbered while"
- " writing linearization data");
+ stopOnError("found object that is not renumbered while writing linearization data");
}
int first = (*(obj_renumber.find(in_object))).second;
int length = 0;
for (int i = 0; i < n; ++i) {
if (lengths.count(first + i) == 0) {
- stopOnError("found item with unknown length"
- " while writing linearization data");
+ stopOnError("found item with unknown length while writing linearization data");
}
length += toI((*(lengths.find(first + toI(i)))).second);
}
@@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset(
{
// Page Offset Hint Table
- // We are purposely leaving some values set to their initial zero
- // values.
+ // We are purposely leaving some values set to their initial zero values.
std::vector<QPDFObjectHandle> const& pages = getAllPages();
size_t npages = pages.size();
CHPageOffset& cph = m->c_page_offset_data;
std::vector<CHPageOffsetEntry>& cphe = cph.entries;
- // Calculate minimum and maximum values for number of objects per
- // page and page length.
+ // Calculate minimum and maximum values for number of objects per page and page length.
int min_nobjects = cphe.at(0).nobjects;
int max_nobjects = min_nobjects;
@@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset(
phe = std::vector<HPageOffsetEntry>(npages);
for (unsigned int i = 0; i < npages; ++i) {
- // Calculate values for each page, assigning full values to
- // the delta items. They will be adjusted later.
+ // Calculate values for each page, assigning full values to the delta items. They will be
+ // adjusted later.
- // Repeat calculations for page 0 so we can assign to phe[i]
- // without duplicating those assignments.
+ // Repeat calculations for page 0 so we can assign to phe[i] without duplicating those
+ // assignments.
int nobjects = cphe.at(i).nobjects;
int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber);
@@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset(
ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total);
ph.shared_denominator = 4; // doesn't matter
- // It isn't clear how to compute content offset and content
- // length. Since we are not interleaving page objects with the
- // content stream, we'll use the same values for content length as
- // page length. We will use 0 as content offset because this is
- // what Adobe does (implementation note 127) and pdlin as well.
+ // It isn't clear how to compute content offset and content length. Since we are not
+ // interleaving page objects with the content stream, we'll use the same values for content
+ // length as page length. We will use 0 as content offset because this is what Adobe does
+ // (implementation note 127) and pdlin as well.
ph.nbits_delta_content_length = ph.nbits_delta_page_length;
ph.min_content_length = ph.min_page_length;
@@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset(
// Adjust delta entries
if ((phe.at(i).delta_nobjects < min_nobjects) ||
(phe.at(i).delta_page_length < min_length)) {
- stopOnError("found too small delta nobjects or delta page length"
- " while writing linearization data");
+ stopOnError("found too small delta nobjects or delta page length while writing "
+ "linearization data");
}
phe.at(i).delta_nobjects -= min_nobjects;
phe.at(i).delta_page_length -= min_length;
@@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject(
for (size_t i = 0; i < toS(cso.nshared_total); ++i) {
// Adjust deltas
if (soe.at(i).delta_group_length < min_length) {
- stopOnError("found too small group length while"
- " writing linearization data");
+ stopOnError("found too small group length while writing linearization data");
}
soe.at(i).delta_group_length -= min_length;
}
@@ -1700,14 +1634,13 @@ template <class T, class int_type>
static void
write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field)
{
- // nitems times, write bits bits from the given field of the ith
- // vector to the given bit writer.
+ // nitems times, write bits bits from the given field of the ith vector to the given bit writer.
for (size_t i = 0; i < QIntC::to_size(nitems); ++i) {
w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits));
}
- // The PDF spec says that each hint table starts at a byte
- // boundary. Each "row" actually must start on a byte boundary.
+ // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must
+ // start on a byte boundary.
w.flush();
}
@@ -1721,8 +1654,8 @@ write_vector_vector(
int bits,
std::vector<int> T::*vec2)
{
- // nitems1 times, write nitems2 (from the ith element of vec1) items
- // from the vec2 vector field of the ith item of vec1.
+ // nitems1 times, write nitems2 (from the ith element of vec1) items from the vec2 vector field
+ // of the ith item of vec1.
for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) {
for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) {
w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits));
@@ -1835,8 +1768,8 @@ QPDF::generateHintStream(
calculateHSharedObject(xref, lengths, obj_renumber);
calculateHOutline(xref, lengths, obj_renumber);
- // Write the hint stream itself into a compressed memory buffer.
- // Write through a counter so we can get offsets.
+ // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
+ // can get offsets.
Pl_Buffer hint_stream("hint stream");
Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate);
Pl_Count c("count", &f);
diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc
index ab06f158..91da7564 100644
--- a/libqpdf/QPDF_optimization.cc
+++ b/libqpdf/QPDF_optimization.cc
@@ -64,9 +64,8 @@ QPDF::optimize(
return;
}
- // The PDF specification indicates that /Outlines is supposed to
- // be an indirect reference. Force it to be so if it exists and
- // is direct. (This has been seen in the wild.)
+ // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force
+ // it to be so if it exists and is direct. (This has been seen in the wild.)
QPDFObjectHandle root = getRoot();
if (root.getKey("/Outlines").isDictionary()) {
QPDFObjectHandle outlines = root.getKey("/Outlines");
@@ -76,8 +75,8 @@ QPDF::optimize(
}
}
- // Traverse pages tree pushing all inherited resources down to the
- // page level. This also initializes m->all_pages.
+ // Traverse pages tree pushing all inherited resources down to the page level. This also
+ // initializes m->all_pages.
pushInheritedAttributesToPage(allow_changes, false);
// Traverse pages
@@ -102,12 +101,10 @@ QPDF::optimize(
}
for (auto const& key: root.getKeys()) {
- // Technically, /I keys from /Thread dictionaries are supposed
- // to be handled separately, but we are going to disregard
- // that specification for now. There is loads of evidence
- // that pdlin and Acrobat both disregard things like this from
- // time to time, so this is almost certain not to cause any
- // problems.
+ // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but
+ // we are going to disregard that specification for now. There is loads of evidence that
+ // pdlin and Acrobat both disregard things like this from time to time, so this is almost
+ // certain not to cause any problems.
updateObjectMaps(
ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters);
}
@@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage()
void
QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
{
- // Traverse pages tree pushing all inherited resources down to the
- // page level.
+ // Traverse pages tree pushing all inherited resources down to the page level.
- // The record of whether we've done this is cleared by
- // updateAllPagesCache(). If we're warning for skipped keys,
- // re-traverse unconditionally.
+ // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning
+ // for skipped keys, re-traverse unconditionally.
if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) {
return;
}
- // Calling getAllPages() resolves any duplicated page objects,
- // repairs broken nodes, and detects loops, so we don't have to do
- // those activities here.
+ // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects
+ // loops, so we don't have to do those activities here.
getAllPages();
- // key_ancestors is a mapping of page attribute keys to a stack of
- // Pages nodes that contain values for them.
+ // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
+ // values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
pushInheritedAttributesToPageInternal(
m->trailer.getKey("/Root").getKey("/Pages"),
@@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal(
bool allow_changes,
bool warn_skipped_keys)
{
- // Make a list of inheritable keys. Only the keys /MediaBox,
- // /CropBox, /Resources, and /Rotate are inheritable
- // attributes. Push this object onto the stack of pages nodes
- // that have values for this attribute.
+ // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate
+ // are inheritable attributes. Push this object onto the stack of pages nodes that have values
+ // for this attribute.
std::set<std::string> inheritable_keys;
for (auto const& key: cur_pages.getKeys()) {
@@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal(
m->file->getName(),
m->last_object_description,
m->file->getLastOffset(),
- "optimize detected an "
- "inheritable attribute when called "
- "in no-change mode");
+ "optimize detected an inheritable attribute when called in no-change mode");
}
// This is an inheritable resource
@@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal(
QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1);
if (!oh.isIndirect()) {
if (!oh.isScalar()) {
- // Replace shared direct object non-scalar
- // resources with indirect objects to avoid
- // copying large structures around.
+ // Replace shared direct object non-scalar resources with indirect objects to
+ // avoid copying large structures around.
cur_pages.replaceKey(key, makeIndirectObject(oh));
oh = cur_pages.getKey(key);
} else {
@@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal(
if (key_ancestors[key].size() > 1) {
QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
}
- // Remove this resource from this node. It will be
- // reattached at the page level.
+ // Remove this resource from this node. It will be reattached at the page level.
cur_pages.removeKey(key);
} else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") ||
(key == "/Count"))) {
- // Warn when flattening, but not if the key is at the top
- // level (i.e. "/Parent" not set), as we don't change these;
- // but flattening removes intermediate /Pages nodes.
+ // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
+ // set), as we don't change these; but flattening removes intermediate /Pages nodes.
if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
QTC::TC("qpdf", "QPDF unknown key not inherited");
setLastObjectDescription("Pages object", cur_pages.getObjGen());
@@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal(
m->last_object_description,
0,
("Unknown key " + key +
- " in /Pages object"
- " is being discarded as a result of"
- " flattening the /Pages tree"));
+ " in /Pages object is being discarded as a result of flattening the /Pages "
+ "tree"));
}
}
}
- // Process descendant nodes. This method does not perform loop
- // detection because all code paths that lead here follow a call
- // to getAllPages, which already throws an exception in the event
+ // Process descendant nodes. This method does not perform loop detection because all code paths
+ // that lead here follow a call to getAllPages, which already throws an exception in the event
// of a loop in the pages tree.
for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
if (kid.isDictionaryOfType("/Pages")) {
pushInheritedAttributesToPageInternal(
kid, key_ancestors, allow_changes, warn_skipped_keys);
} else {
- // Add all available inheritable attributes not present in
- // this object to this object.
+ // Add all available inheritable attributes not present in this object to this object.
for (auto const& iter: key_ancestors) {
std::string const& key = iter.first;
if (!kid.hasKey(key)) {
@@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal(
}
}
- // For each inheritable key, pop the stack. If the stack
- // becomes empty, remove it from the map. That way, the
- // invariant that the list of keys in key_ancestors is exactly
- // those keys for which inheritable attributes are available.
+ // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map.
+ // That way, the invariant that the list of keys in key_ancestors is exactly those keys for
+ // which inheritable attributes are available.
if (!inheritable_keys.empty()) {
QTC::TC("qpdf", "QPDF opt inheritable keys");
@@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal(
QPDFObjGen::set& visited,
bool top)
{
- // Traverse the object tree from this point taking care to avoid
- // crossing page boundaries.
+ // Traverse the object tree from this point taking care to avoid crossing page boundaries.
bool is_page_node = false;
@@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal(
for (auto const& key: dict.getKeys()) {
if (is_page_node && (key == "/Thumb")) {
- // Traverse page thumbnail dictionaries as a special
- // case.
+ // Traverse page thumbnail dictionaries as a special case.
updateObjectMapsInternal(
ObjUser(ObjUser::ou_thumb, ou.pageno),
dict.getKey(key),
@@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal(
} else if (
((ssp >= 1) && (key == "/Length")) ||
((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) {
- // Don't traverse into stream parameters that we are
- // not going to write.
+ // Don't traverse into stream parameters that we are not going to write.
} else {
updateObjectMapsInternal(
ou, dict.getKey(key), skip_stream_parameters, visited, false);
@@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data)
return;
}
- // Transform object_to_obj_users and obj_user_to_objects so that
- // they refer only to uncompressed objects. If something is a
- // user of a compressed object, then it is really a user of the
+ // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
+ // objects. If something is a user of a compressed object, then it is really a user of the
// object stream that contains it.
std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index 81fd11a3..e03dabc8 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -4,55 +4,42 @@
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
-// In support of page manipulation APIs, these methods internally
-// maintain state about pages in a pair of data structures: all_pages,
-// which is a vector of page objects, and pageobj_to_pages_pos, which
-// maps a page object to its position in the all_pages array.
-// Unfortunately, the getAllPages() method returns a const reference
-// to all_pages and has been in the public API long before the
-// introduction of mutation APIs, so we're pretty much stuck with it.
-// Anyway, there are lots of calls to it in the library, so the
-// efficiency of having it cached is probably worth keeping it. At one
-// point, I had partially implemented a helper class specifically for
-// the pages tree, but once you work in all the logic that handles
-// repairing the /Type keys of page tree nodes (both /Pages and /Page)
-// and deal with duplicate pages, it's just as complex and less
-// efficient than what's here. So, in spite of the fact that a const
-// reference is returned, the current code is fine and does not need
-// to be replaced. A partial implementation of QPDFPagesTree is in
-// github in attic in case there is ever a reason to resurrect it.
-// There are additional notes in README-maintainer, which also refers
-// to this comment.
+// In support of page manipulation APIs, these methods internally maintain state about pages in a
+// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos,
+// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages()
+// method returns a const reference to all_pages and has been in the public API long before the
+// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of
+// calls to it in the library, so the efficiency of having it cached is probably worth keeping it.
+// At one point, I had partially implemented a helper class specifically for the pages tree, but
+// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both
+// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than
+// what's here. So, in spite of the fact that a const reference is returned, the current code is
+// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in
+// attic in case there is ever a reason to resurrect it. There are additional notes in
+// README-maintainer, which also refers to this comment.
-// The goal of this code is to ensure that the all_pages vector, which
-// users may have a reference to, and the pageobj_to_pages_pos map,
-// which users will not have access to, remain consistent outside of
-// any call to the library. As long as users only touch the /Pages
-// structure through page-specific API calls, they never have to worry
-// about anything, and this will also stay consistent. If a user
-// touches anything about the /Pages structure outside of these calls
-// (such as by directly looking up and manipulating the underlying
-// objects), they can call updatePagesCache() to bring things back in
-// sync.
+// The goal of this code is to ensure that the all_pages vector, which users may have a reference
+// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent
+// outside of any call to the library. As long as users only touch the /Pages structure through
+// page-specific API calls, they never have to worry about anything, and this will also stay
+// consistent. If a user touches anything about the /Pages structure outside of these calls (such
+// as by directly looking up and manipulating the underlying objects), they can call
+// updatePagesCache() to bring things back in sync.
-// If the user doesn't ever use the page manipulation APIs, then qpdf
-// leaves the /Pages structure alone. If the user does use the APIs,
-// then we push all inheritable objects down and flatten the /Pages
-// tree. This makes it easier for us to keep /Pages, all_pages, and
-// pageobj_to_pages_pos internally consistent at all times.
+// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure
+// alone. If the user does use the APIs, then we push all inheritable objects down and flatten the
+// /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos
+// internally consistent at all times.
-// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the
-// Pages structure consistent should remain in as few places as
-// possible. As of initial writing, only flattenPagesTree,
-// insertPage, and removePage, along with methods they call, are
-// concerned with it. Everything else goes through one of those
-// methods.
+// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent
+// should remain in as few places as possible. As of initial writing, only flattenPagesTree,
+// insertPage, and removePage, along with methods they call, are concerned with it. Everything else
+// goes through one of those methods.
std::vector<QPDFObjectHandle> const&
QPDF::getAllPages()
{
- // Note that pushInheritedAttributesToPage may also be used to
- // initialize m->all_pages.
+ // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
if (m->all_pages.empty()) {
m->ever_called_get_all_pages = true;
QPDFObjGen::set visited;
@@ -65,9 +52,8 @@ QPDF::getAllPages()
// loop -- will be detected again and reported later
break;
}
- // Files have been found in the wild where /Pages in the
- // catalog points to the first page. Try to work around
- // this and similar cases with this heuristic.
+ // Files have been found in the wild where /Pages in the catalog points to the first
+ // page. Try to work around this and similar cases with this heuristic.
if (!warned) {
getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point"
" to the root of the page tree; attempting to correct");
@@ -118,8 +104,8 @@ QPDF::getAllPagesInternal(
kid = makeIndirectObject(kid);
kids.setArrayItem(i, kid);
} else if (!seen.add(kid)) {
- // Make a copy of the page. This does the same as
- // shallowCopyPage in QPDFPageObjectHelper.
+ // Make a copy of the page. This does the same as shallowCopyPage in
+ // QPDFPageObjectHelper.
QTC::TC("qpdf", "QPDF resolve duplicated page object");
cur_node.warnIfPossible(
"kid " + std::to_string(i) +
@@ -141,9 +127,8 @@ QPDF::getAllPagesInternal(
void
QPDF::updateAllPagesCache()
{
- // Force regeneration of the pages cache. We force immediate
- // recalculation of all_pages since users may have references to
- // it that they got from calls to getAllPages(). We can defer
+ // Force regeneration of the pages cache. We force immediate recalculation of all_pages since
+ // users may have references to it that they got from calls to getAllPages(). We can defer
// recalculation of pageobj_to_pages_pos until needed.
QTC::TC("qpdf", "QPDF updateAllPagesCache");
m->all_pages.clear();
@@ -155,25 +140,23 @@ QPDF::updateAllPagesCache()
void
QPDF::flattenPagesTree()
{
- // If not already done, flatten the /Pages structure and
- // initialize pageobj_to_pages_pos.
+ // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
if (!m->pageobj_to_pages_pos.empty()) {
return;
}
- // Push inherited objects down to the /Page level. As a side
- // effect m->all_pages will also be generated.
+ // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be
+ // generated.
pushInheritedAttributesToPage(true, true);
QPDFObjectHandle pages = getRoot().getKey("/Pages");
size_t const len = m->all_pages.size();
for (size_t pos = 0; pos < len; ++pos) {
- // Populate pageobj_to_pages_pos and fix parent pointer. There
- // should be no duplicates at this point because
- // pushInheritedAttributesToPage calls getAllPages which
- // resolves duplicates.
+ // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
+ // this point because pushInheritedAttributesToPage calls getAllPages which resolves
+ // duplicates.
insertPageobjToPage(m->all_pages.at(pos), toI(pos), true);
m->all_pages.at(pos).replaceKey("/Parent", pages);
}
@@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli
QPDFObjGen og(obj.getObjGen());
if (check_duplicate) {
if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
- // The library never calls insertPageobjToPage in a way
- // that causes this to happen.
+ // The library never calls insertPageobjToPage in a way that causes this to happen.
setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og);
throw QPDFExc(
qpdf_e_pages,
m->file->getName(),
m->last_object_description,
0,
- "duplicate page reference found;"
- " this would cause loss of data");
+ "duplicate page reference found; this would cause loss of data");
}
} else {
m->pageobj_to_pages_pos[og] = pos;
@@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli
void
QPDF::insertPage(QPDFObjectHandle newpage, int pos)
{
- // pos is numbered from 0, so pos = 0 inserts at the beginning and
- // pos = npages adds to the end.
+ // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
flattenPagesTree();
@@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
QTC::TC(
"qpdf",
"QPDF insert page",
- (pos == 0) ? 0 : // insert at beginning
- (pos == toI(m->all_pages.size())) ? 1
- : // at end
- 2); // insert in middle
+ (pos == 0) ? 0 : // insert at beginning
+ (pos == toI(m->all_pages.size())) ? 1 // at end
+ : 2); // insert in middle
auto og = newpage.getObjGen();
if (m->pageobj_to_pages_pos.count(og)) {
@@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page)
QTC::TC(
"qpdf",
"QPDF remove page",
- (pos == 0) ? 0 : // remove at beginning
- (pos == toI(m->all_pages.size() - 1)) ? 1
- : // end
- 2); // remove in middle
+ (pos == 0) ? 0 : // remove at beginning
+ (pos == toI(m->all_pages.size() - 1)) ? 1 // end
+ : 2); // remove in middle
QPDFObjectHandle pages = getRoot().getKey("/Pages");
QPDFObjectHandle kids = pages.getKey("/Kids");