aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore8
-rw-r--r--doc/stylesheet.css284
-rw-r--r--manual/common.xsl9
-rw-r--r--manual/html.xsl.in8
-rw-r--r--manual/print.xsl.in69
-rw-r--r--manual/qpdf-manual.xml10193
6 files changed, 1 insertions, 10570 deletions
diff --git a/.gitignore b/.gitignore
index 2eb7fe16..3a086e8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,11 +6,7 @@ autom4te.cache/
config.log
config.status
distfiles.zip
-doc/fix-qdf.1
-doc/qpdf-manual.html
-doc/qpdf-manual.pdf
-doc/qpdf.1
-doc/zlib-flate.1
+doc
examples/build/
external-libs
fuzz/build/
@@ -21,8 +17,6 @@ libqpdf/qpdf/qpdf-config.h
libtests/build/
libtool
manual/build/
-manual/html.xsl
-manual/print.xsl
qpdf/build/
zlib-flate/build/
distribution/
diff --git a/doc/stylesheet.css b/doc/stylesheet.css
deleted file mode 100644
index abaf226f..00000000
--- a/doc/stylesheet.css
+++ /dev/null
@@ -1,284 +0,0 @@
-/**************************************************************/
-/* Custom style-sheet for the QPDF manual in HTML form. */
-/**************************************************************/
-
-/*
- * This file is the CSS for the QPDF manual. It is based heavily on
- * the CSS for the Subversion book. That file contains the following
- * copyright and attribution:
- *
- * Copyright (c) 2003-2007
- * Ben Collins-Sussman, Brian W. Fitzpatrick, C. Michael Pilato.
- *
- * This work is licensed under the Creative Commons Attribution License.
- * To view a copy of this license, visit
- * http://creativecommons.org/licenses/by/2.0/ or send a letter to
- * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305,
- * USA.
- */
-
-body
-{
- background: white;
- margin: 0.5in;
-}
-
-p, li, ul, ol, dd, dt
-{
- font-style: normal;
- font-weight: normal;
- color: black;
-}
-
-tt, pre
-{
- font-family: courier new,courier,fixed;
-}
-
-a
-{
- color: blue;
- text-decoration: underline;
-}
-
-a:hover
-{
- background: rgb(75%,75%,100%);
- color: blue;
- text-decoration: underline;
-}
-
-a:visited
-{
- color: purple;
- text-decoration: underline;
-}
-
-img
-{
- border: none;
-}
-
-h1.title
-{
- font-size: 250%;
- font-style: normal;
- font-weight: bold;
- color: black;
-}
-
-h2.subtitle
-{
- font-size: 150%;
- font-style: italic;
- color: black;
-}
-
-h2.title
-{
- font-size: 150%;
- font-style: normal;
- font-weight: bold;
- color: black;
-}
-
-h3.title
-{
- font-size: 125%;
- font-style: normal;
- font-weight: bold;
- color: black;
-}
-
-h4.title
-{
- font-size: 100%;
- font-style: normal;
- font-weight: bold;
- color: black;
-}
-
-.toc b
-{
- font-size: 125%;
- font-style: normal;
- font-weight: bold;
- color: black;
-}
-
-.screen, .programlisting, .literal
-{
- font-family: courier new,courier,fixed;
- font-style: normal;
- font-weight: normal;
-}
-
-.command, .option, .type
-{
- font-family: courier new,courier,fixed;
- font-style: normal;
- font-weight: normal;
-}
-
-.filename
-{
- font-family: arial,helvetica,sans-serif;
- font-style: italic;
-}
-
-.property
-{
- font-family: arial,helvetica,sans-serif;
- font-weight: bold;
-}
-
-.classname
-{
- font-family: arial,helvetica,sans-serif;
- font-weight: bold;
- font-style: italic;
-}
-
-.varname, .function, .envar
-{
- font-family: arial,helvetica,sans-serif;
- font-style: italic;
-}
-
-.replaceable
-{
- font-style: italic;
- font-size: 100%;
-}
-
-.figure, .example, .table
-{
- margin: 0.125in 0.25in;
-}
-
-.table table
-{
- border-width: 1px;
- border-style: solid;
- border-color: black;
- border-spacing: 0;
- background: rgb(240,240,240);
-}
-
-.table td
-{
- border: none;
- border-right: 1px black solid;
- border-bottom: 1px black solid;
- padding: 2px;
-}
-
-.table th
-{
- background: rgb(180,180,180);
- border: none;
- border-right: 1px black solid;
- border-bottom: 1px black solid;
- padding: 2px;
-}
-
-.table p.title, .figure p.title, .example p.title
-{
- text-align: left !important;
- font-size: 100% !important;
-}
-
-.author, .pubdate
-{
- margin: 0;
- font-size: 100%;
- font-style: italic;
- font-weight: normal;
- color: black;
-}
-
-.preface div.author, .preface .pubdate
-{
- font-size: 80%;
-}
-
-.sidebar
-{
- border-top: dotted 1px black;
- border-left: dotted 1px black;
- border-right: solid 2px black;
- border-bottom: solid 2px black;
- background: rgb(240,220,170);
- padding: 0 0.12in;
- margin: 0.25in;
-}
-
-.note .programlisting, .note .screen,
-.tip .programlisting, .tip .screen,
-.warning .programlisting, .warning .screen,
-.sidebar .programlisting, .sidebar .screen
-{
- border: none;
- background: none;
-}
-
-.sidebar p.title
-{
- text-align: center;
- font-size: 125%;
-}
-
-.note
-{
- border: black solid 1px;
- background: url(./images/note.png) no-repeat rgb(252,246,220);
- margin: 0.125in 0;
- padding: 0 55px;
-}
-
-.tip
-{
- border: black solid 1px;
- background: url(./images/tip.png) no-repeat rgb(224,244,255);
- margin: 0.125in 0;
- padding: 0 55px;
-}
-
-.warning
-{
- border: black solid 1px;
- background: url(./images/warning.png) no-repeat rgb(255,210,210);
- margin: 0.125in 0;
- padding: 0 55px;
-}
-
-/*
-.note .title, .tip .title, .warning .title
-{
- display: none;
-}
-*/
-
-.programlisting, .screen
-{
- font-size: 90%;
- color: black;
- margin: 1em 0.25in;
- padding: 0.5em;
- background: rgb(240,240,240);
- border-top: black dotted 1px;
- border-left: black dotted 1px;
- border-right: black solid 2px;
- border-bottom: black solid 2px;
-}
-
-.navheader, .navfooter
-{
- border: black solid 1px;
- background: rgb(180,180,200);
-}
-
-.navheader hr, .navfooter hr
-{
- display: none;
-}
diff --git a/manual/common.xsl b/manual/common.xsl
deleted file mode 100644
index e564bfd8..00000000
--- a/manual/common.xsl
+++ /dev/null
@@ -1,9 +0,0 @@
-<?xml version='1.0'?>
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
- <xsl:param name="variablelist.as.blocks" select="1"/>
- <xsl:param name="body.start.indent">0pt</xsl:param>
- <xsl:param name="xref.with.number.and.title" select="'yes'"/>
- <xsl:param name="section.autolabel" select="1"/>
- <xsl:param name="section.label.includes.component.label" select="1"/>
-</xsl:stylesheet>
diff --git a/manual/html.xsl.in b/manual/html.xsl.in
deleted file mode 100644
index e96f0583..00000000
--- a/manual/html.xsl.in
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version='1.0'?>
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:fo="http://www.w3.org/1999/XSL/Format"
- version="1.0">
- <xsl:import href="@DOCBOOK_XHTML@"/>
- <xsl:import href="common.xsl"/>
- <xsl:param name="html.stylesheet">stylesheet.css</xsl:param>
-</xsl:stylesheet>
diff --git a/manual/print.xsl.in b/manual/print.xsl.in
deleted file mode 100644
index d712cb35..00000000
--- a/manual/print.xsl.in
+++ /dev/null
@@ -1,69 +0,0 @@
-<?xml version='1.0'?>
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:fo="http://www.w3.org/1999/XSL/Format"
- version="1.0">
- <xsl:import href="@DOCBOOK_FO@"/>
- <xsl:import href="common.xsl"/>
- <xsl:param name="local.l10n.xml" select="document('')"/>
- <l:i18n xmlns:l="http://docbook.sourceforge.net/xmlns/l10n/1.0">
- <l:l10n language="en">
- <l:context name="xref">
- <l:template name="page.citation" text=", page %p"/>
- </l:context>
- </l:l10n>
- </l:i18n>
- <!-- This should give us bookmarks, but it's broken for fop -->
- <!-- 0.94 and stylesheets 1.73.2. -->
-<!--
- <xsl:param name="fop.extensions" select="1"/>
--->
- <xsl:param name="paper.type" select="'USLetter'"/>
- <xsl:param name="insert.xref.page.number" select="'yes'"/>
-<!--
- <xsl:param name="admon.graphics.path">
- /tmp/z/docbook-xsl-1.73.2/images/
- </xsl:param>
- <xsl:param name="admon.graphics" select="1"/>
--->
- <xsl:param name="shade.verbatim" select="1"/>
- <xsl:attribute-set name="shade.verbatim.style">
- <xsl:attribute name="background-color">#F0F0F0</xsl:attribute>
- <xsl:attribute name="border-width">0.5pt</xsl:attribute>
- <xsl:attribute name="border-style">solid</xsl:attribute>
- <xsl:attribute name="border-color">#575757</xsl:attribute>
- <xsl:attribute name="padding">3pt</xsl:attribute>
- </xsl:attribute-set>
- <xsl:attribute-set name="xref.properties">
- <xsl:attribute name="color">#00c</xsl:attribute>
- </xsl:attribute-set>
- <fo:page-sequence language="en"/>
- <fo:block hyphenate="true"/>
-
- <xsl:template match="property">
- <xsl:call-template name="inline.boldseq"/>
- </xsl:template>
- <xsl:template match="classname">
- <fo:inline font-family="sans-serif" font-weight="bold">
- <xsl:call-template name="inline.italicseq"/>
- </fo:inline>
- </xsl:template>
- <xsl:template match="filename">
- <xsl:call-template name="inline.italicseq"/>
- </xsl:template>
- <xsl:template match="varname">
- <xsl:call-template name="inline.italicseq"/>
- </xsl:template>
- <xsl:template match="function">
- <xsl:call-template name="inline.italicseq"/>
- </xsl:template>
- <xsl:template match="envar">
- <xsl:call-template name="inline.italicseq"/>
- </xsl:template>
- <xsl:template match="type">
- <xsl:call-template name="inline.monoseq"/>
- </xsl:template>
- <xsl:template match="option">
- <xsl:call-template name="inline.boldseq"/>
- </xsl:template>
-
-</xsl:stylesheet>
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
deleted file mode 100644
index 1a543c23..00000000
--- a/manual/qpdf-manual.xml
+++ /dev/null
@@ -1,10193 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE book [
-<!ENTITY swversion "10.4.0">
-<!ENTITY lastreleased "November 16, 2021">
-]>
-<book>
- <bookinfo>
- <title>QPDF Manual</title>
- <subtitle>For QPDF Version &swversion;, &lastreleased;</subtitle>
- <author>
- <firstname>Jay</firstname><surname>Berkenbilt</surname>
- </author>
- <copyright>
- <year>2005&ndash;2020</year>
- <holder>Jay Berkenbilt</holder>
- </copyright>
- </bookinfo>
- <preface id="acknowledgments">
- <title>General Information</title>
- <para>
- QPDF is a program that does structural, content-preserving
- transformations on PDF files. QPDF's website is located at <ulink
- url="https://qpdf.sourceforge.io/">https://qpdf.sourceforge.io/</ulink>.
- QPDF's source code is hosted on github at <ulink
- url="https://github.com/qpdf/qpdf">https://github.com/qpdf/qpdf</ulink>.
- </para>
- <para>
- QPDF is licensed under <ulink
- url="http://www.apache.org/licenses/LICENSE-2.0">the Apache
- License, Version 2.0</ulink> (the "License"). Unless required by
- applicable law or agreed to in writing, software distributed under
- the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
- OR CONDITIONS OF ANY KIND, either express or implied. See the
- License for the specific language governing permissions and
- limitations under the License.
- </para>
- <para>
- Versions of qpdf prior to version 7 were released under the terms
- of <ulink url="https://opensource.org/licenses/Artistic-2.0">the
- Artistic License, version 2.0</ulink>. At your option, you may
- continue to consider qpdf to be licensed under those terms. The
- Apache License 2.0 permits everything that the Artistic License 2.0
- permits but is slightly less restrictive. Allowing the Artistic
- License to continue being used is primary to help people who may
- have to get specific approval to use qpdf in their products.
- </para>
- <para>
- QPDF is intentionally released with a permissive license. However,
- if there is some reason that the licensing terms don't work for
- your requirements, please feel free to contact the copyright holder
- to make other arrangements.
- </para>
- <para>
- QPDF was originally created in 2001 and modified periodically
- between 2001 and 2005 during my employment at <ulink
- url="http://www.apexcovantage.com">Apex CoVantage</ulink>. Upon my
- departure from Apex, the company graciously allowed me to take
- ownership of the software and continue maintaining as an open
- source project, a decision for which I am very grateful. I have
- made considerable enhancements to it since that time. I feel
- fortunate to have worked for people who would make such a decision.
- This work would not have been possible without their support.
- </para>
- </preface>
- <chapter id="ref.overview">
- <title>What is QPDF?</title>
- <para>
- QPDF is a program that does structural, content-preserving
- transformations on PDF files. It could have been called something
- like <emphasis>pdf-to-pdf</emphasis>. It also provides many useful
- capabilities to developers of PDF-producing software or for people
- who just want to look at the innards of a PDF file to learn more
- about how they work.
- </para>
- <para>
- With QPDF, it is possible to copy objects from one PDF file into
- another and to manipulate the list of pages in a PDF file. This
- makes it possible to merge and split PDF files. The QPDF library
- also makes it possible for you to create PDF files from scratch.
- In this mode, you are responsible for supplying all the contents of
- the file, while the QPDF library takes care off all the syntactical
- representation of the objects, creation of cross references tables
- and, if you use them, object streams, encryption, linearization,
- and other syntactic details. You are still responsible for
- generating PDF content on your own.
- </para>
- <para>
- QPDF has been designed with very few external dependencies, and it
- is intentionally very lightweight. QPDF is
- <emphasis>not</emphasis> a PDF content creation library, a PDF
- viewer, or a program capable of converting PDF into other formats.
- In particular, QPDF knows nothing about the semantics of PDF
- content streams. If you are looking for something that can do
- that, you should look elsewhere. However, once you have a valid
- PDF file, QPDF can be used to transform that file in ways perhaps
- your original PDF creation can't handle. For example, many
- programs generate simple PDF files but can't password-protect them,
- web-optimize them, or perform other transformations of that type.
- </para>
- </chapter>
- <chapter id="ref.installing">
- <title>Building and Installing QPDF</title>
- <para>
- This chapter describes how to build and install qpdf. Please see
- also the @1@filename@1@README.md@2@filename@2@ and
- @1@filename@1@INSTALL@2@filename@2@ files in the source distribution.
- </para>
- <sect1 id="ref.prerequisites">
- <title>System Requirements</title>
- <para>
- The qpdf package has few external dependencies. In order to build
- qpdf, the following packages are required:
- <itemizedlist>
- <listitem>
- <para>
- A C++ compiler that supports C++-14.
- </para>
- </listitem>
- <listitem>
- <para>
- zlib: <ulink url="http://www.zlib.net/">http://www.zlib.net/</ulink>
- </para>
- </listitem>
- <listitem>
- <para>
- jpeg: <ulink
- url="http://www.ijg.org/files/">http://www.ijg.org/files/</ulink>
- or <ulink
- url="https://libjpeg-turbo.org/">https://libjpeg-turbo.org/</ulink>
- </para>
- </listitem>
- <listitem>
- <para>
- <emphasis>Recommended but not required:</emphasis> gnutls:
- <ulink url="https://www.gnutls.org/">https://www.gnutls.org/</ulink>
- to be able to use the gnutls crypto provider, and/or openssl:
- <ulink url="https://openssl.org/">https://openssl.org/</ulink>
- to be able to use the openssl crypto provider.
- </para>
- </listitem>
- <listitem>
- <para>
- gnu make 3.81 or newer: <ulink url="http://www.gnu.org/software/make">http://www.gnu.org/software/make</ulink>
- </para>
- </listitem>
- <listitem>
- <para>
- perl version 5.8 or newer:
- <ulink url="http://www.perl.org/">http://www.perl.org/</ulink>;
- required for running the test suite. Starting with qpdf version
- 9.1.1, perl is no longer required at runtime.
- </para>
- </listitem>
- <listitem>
- <para>
- GNU diffutils (any version): <ulink
- url="http://www.gnu.org/software/diffutils/">http://www.gnu.org/software/diffutils/</ulink>
- is required to run the test suite. Note that this is the
- version of diff present on virtually all GNU/Linux systems.
- This is required because the test suite uses @1@command@1@diff
- -u@2@command@2@.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Part of qpdf's test suite does comparisons of the contents PDF
- files by converting them images and comparing the images. The
- image comparison tests are disabled by default. Those tests are
- not required for determining correctness of a qpdf build if you
- have not modified the code since the test suite also contains
- expected output files that are compared literally. The image
- comparison tests provide an extra check to make sure that any
- content transformations don't break the rendering of pages.
- Transformations that affect the content streams themselves are off
- by default and are only provided to help developers look into the
- contents of PDF files. If you are making deep changes to the
- library that cause changes in the contents of the files that qpdf
- generates, then you should enable the image comparison tests.
- Enable them by running @1@command@1@configure@2@command@2@ with the
- @1@option@1@--enable-test-compare-images@2@option@2@ flag. If you enable
- this, the following additional requirements are required by the
- test suite. Note that in no case are these items required to use
- qpdf.
- <itemizedlist>
- <listitem>
- <para>
- libtiff: <ulink url="http://www.remotesensing.org/libtiff/">http://www.remotesensing.org/libtiff/</ulink>
- </para>
- </listitem>
- <listitem>
- <para>
- GhostScript version 8.60 or newer: <ulink
- url="http://www.ghostscript.com">http://www.ghostscript.com</ulink>
- </para>
- </listitem>
- </itemizedlist>
- If you do not enable this, then you do not need to have tiff and
- ghostscript.
- </para>
- <para>
- Pre-built documentation is distributed with qpdf, so you should
- generally not need to rebuild the documentation. In order to
- build the documentation from its docbook sources, you need the
- docbook XML style sheets (<ulink
- url="http://downloads.sourceforge.net/docbook/">http://downloads.sourceforge.net/docbook/</ulink>).
- To build the PDF version of the documentation, you need Apache fop
- (<ulink
- url="http://xml.apache.org/fop/">http://xml.apache.org/fop/</ulink>)
- version 0.94 or higher.
- </para>
- </sect1>
- <sect1 id="ref.building">
- <title>Build Instructions</title>
- <para>
- Building qpdf on UNIX is generally just a matter of running
-
- <programlisting>./configure
-make
-</programlisting>
- You can also run @1@command@1@make check@2@command@2@ to run the test
- suite and @1@command@1@make install@2@command@2@ to install. Please run
- @1@command@1@./configure --help@2@command@2@ for options on what can be
- configured. You can also set the value of
- <varname>DESTDIR</varname> during installation to install to a
- temporary location, as is common with many open source packages.
- Please see also the @1@filename@1@README.md@2@filename@2@ and
- @1@filename@1@INSTALL@2@filename@2@ files in the source distribution.
- </para>
- <para>
- Building on Windows is a little bit more complicated. For
- details, please see @1@filename@1@README-windows.md@2@filename@2@ in the
- source distribution. You can also download a binary distribution
- for Windows. There is a port of qpdf to Visual C++ version 6 in
- the @1@filename@1@contrib@2@filename@2@ area generously contributed by
- Jian Ma. This is also discussed in more detail in
- @1@filename@1@README-windows.md@2@filename@2@.
- </para>
- <para>
- While <type>wchar_t</type> is part of the C++ standard, qpdf uses
- it in only one place in the public API, and it's just in a helper
- function. It is possible to build qpdf on a system that doesn't
- have <type>wchar_t</type>, and it's also possible to compile a
- program that uses qpdf on a system without <type>wchar_t</type> as
- long as you don't call that one method. This is a very unusual
- situation. For a detailed discussion, please see the top-level
- README.md file in qpdf's source distribution.
- </para>
- <para>
- There are some other things you can do with the build. Although
- qpdf uses @1@application@1@autoconf@2@application@2@, it does not use
- @1@application@1@automake@2@application@2@ but instead uses a
- hand-crafted non-recursive Makefile that requires gnu make. If
- you're really interested, please read the comments in the
- top-level @1@filename@1@Makefile@2@filename@2@.
- </para>
- </sect1>
- <sect1 id="ref.crypto">
- <title>Crypto Providers</title>
- <para>
- Starting with qpdf 9.1.0, the qpdf library can be built with
- multiple implementations of providers of cryptographic functions,
- which we refer to as "crypto providers." At the time
- of writing, a crypto implementation must provide MD5 and SHA2
- (256, 384, and 512-bit) hashes and RC4 and AES256 with and without
- CBC encryption. In the future, if digital signature is added to
- qpdf, there may be additional requirements beyond this.
- </para>
- <para>
- Starting with qpdf version 9.1.0, the available implementations
- are <literal>native</literal> and <literal>gnutls</literal>. In
- qpdf 10.0.0, <literal>openssl</literal> was added. Additional
- implementations may be added if needed. It is also possible for a
- developer to provide their own implementation without modifying
- the qpdf library.
- </para>
- <sect2 id="ref.crypto.build">
- <title>Build Support For Crypto Providers</title>
- <para>
- When building with qpdf's build system, crypto providers can be
- enabled at build time using various
- @1@command@1@./configure@2@command@2@ options. The default behavior is
- for @1@command@1@./configure@2@command@2@ to discover which crypto
- providers can be supported based on available external libraries,
- to build all available crypto providers, and to use an external
- provider as the default over the native one. This behavior can be
- changed with the following flags to
- @1@command@1@./configure@2@command@2@:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@--enable-crypto-@1@replaceable@1@x@2@replaceable@2@@2@option@2@
- (where @1@replaceable@1@x@2@replaceable@2@ is a supported crypto
- provider): enable the @1@replaceable@1@x@2@replaceable@2@ crypto
- provider, requiring any external dependencies it needs
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--disable-crypto-@1@replaceable@1@x@2@replaceable@2@@2@option@2@:
- disable the @1@replaceable@1@x@2@replaceable@2@ provider, and do not
- link against its dependencies even if they are available
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--with-default-crypto=@1@replaceable@1@x@2@replaceable@2@@2@option@2@:
- make @1@replaceable@1@x@2@replaceable@2@ the default provider even if
- a higher priority one is available
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--disable-implicit-crypto@2@option@2@: only build crypto
- providers that are explicitly requested with an
- @1@option@1@--enable-crypto-@1@replaceable@1@x@2@replaceable@2@@2@option@2@
- option
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- For example, if you want to guarantee that the gnutls crypto
- provider is used and that the native provider is not built, you
- could run @1@command@1@./configure --enable-crypto-gnutls
- --disable-implicit-crypto@2@command@2@.
- </para>
- <para>
- If you build qpdf using your own build system, in order for qpdf
- to work at all, you need to enable at least one crypto provider.
- The file @1@filename@1@libqpdf/qpdf/qpdf-config.h.in@2@filename@2@
- provides macros <literal>DEFAULT_CRYPTO</literal>, whose value
- must be a string naming the default crypto provider, and various
- symbols starting with <literal>USE_CRYPTO_</literal>, at least
- one of which has to be enabled. Additionally, you must compile
- the source files that implement a crypto provider. To get a list
- of those files, look at @1@filename@1@libqpdf/build.mk@2@filename@2@. If
- you want to omit a particular crypto provider, as long as its
- <literal>USE_CRYPTO_</literal> symbol is undefined, you can
- completely ignore the source files that belong to a particular
- crypto provider. Additionally, crypto providers may have their
- own external dependencies that can be omitted if the crypto
- provider is not used. For example, if you are building qpdf
- yourself and are using an environment that does not support
- gnutls or openssl, you can ensure that
- <literal>USE_CRYPTO_NATIVE</literal> is defined,
- <literal>USE_CRYPTO_GNUTLS</literal> is not defined, and
- <literal>DEFAULT_CRYPTO</literal> is defined to
- <literal>"native"</literal>. Then you must include the source
- files used in the native implementation, some of which were added
- or renamed from earlier versions, to your build, and you can
- ignore @1@filename@1@QPDFCrypto_gnutls.cc@2@filename@2@. Always consult
- @1@filename@1@libqpdf/build.mk@2@filename@2@ to get the list of source
- files you need to build.
- </para>
- </sect2>
- <sect2 id="ref.crypto.runtime">
- <title>Runtime Crypto Provider Selection</title>
- <para>
- You can use the @1@option@1@--show-crypto@2@option@2@ option to
- @1@command@1@qpdf@2@command@2@ to get a list of available crypto
- providers. The default provider is always listed first, and the
- rest are listed in lexical order. Each crypto provider is listed
- on a line by itself with no other text, enabling the output of
- this command to be used easily in scripts.
- </para>
- <para>
- You can override which crypto provider is used by setting the
- <literal>QPDF_CRYPTO_PROVIDER</literal> environment variable.
- There are few reasons to ever do this, but you might want to do
- it if you were explicitly trying to compare behavior of two
- different crypto providers while testing performance or
- reproducing a bug. It could also be useful for people who are
- implementing their own crypto providers.
- </para>
- </sect2>
- <sect2 id="ref.crypto.develop">
- <title>Crypto Provider Information for Developers</title>
- <para>
- If you are writing code that uses libqpdf and you want to force a
- certain crypto provider to be used, you can call the method
- <function>QPDFCryptoProvider::setDefaultProvider</function>. The
- argument is the name of a built-in or developer-supplied
- provider. To add your own crypto provider, you have to create a
- class derived from <classname>QPDFCryptoImpl</classname> and
- register it with <classname>QPDFCryptoProvider</classname>. For
- additional information, see comments in
- @1@filename@1@include/qpdf/QPDFCryptoImpl.hh@2@filename@2@.
- </para>
- </sect2>
- <sect2 id="ref.crypto.design">
- <title>Crypto Provider Design Notes</title>
- <para>
- This section describes a few bits of rationale for why the crypto
- provider interface was set up the way it was. You don't need to
- know any of this information, but it's provided for the record
- and in case it's interesting.
- </para>
- <para>
- As a general rule, I want to avoid as much as possible including
- large blocks of code that are conditionally compiled such that,
- in most builds, some code is never built. This is dangerous
- because it makes it very easy for invalid code to creep in
- unnoticed. As such, I want it to be possible to build qpdf with
- all available crypto providers, and this is the way I build qpdf
- for local development. At the same time, if a particular packager
- feels that it is a security liability for qpdf to use crypto
- functionality from other than a library that gets considerable
- scrutiny for this specific purpose (such as gnutls, openssl, or
- nettle), then I want to give that packager the ability to
- completely disable qpdf's native implementation. Or if someone
- wants to avoid adding a dependency on one of the external crypto
- providers, I don't want the availability of the provider to
- impose additional external dependencies within that environment.
- Both of these are situations that I know to be true for some
- users of qpdf.
- </para>
- <para>
- I want registration and selection of crypto providers to be
- thread-safe, and I want it to work deterministically for a
- developer to provide their own crypto provider and be able to set
- it up as the default. This was the primary motivation behind
- requiring C++-11 as doing so enabled me to exploit the guaranteed
- thread safety of local block static initialization. The
- <classname>QPDFCryptoProvider</classname> class uses a singleton
- pattern with thread-safe initialization to create the singleton
- instance of <classname>QPDFCryptoProvider</classname> and exposes
- only static methods in its public interface. In this way, if a
- developer wants to call any
- <classname>QPDFCryptoProvider</classname> methods, the library
- guarantees the <classname>QPDFCryptoProvider</classname> is fully
- initialized and all built-in crypto providers are registered.
- Making <classname>QPDFCryptoProvider</classname> actually know
- about all the built-in providers may seem a bit sad at first, but
- this choice makes it extremely clear exactly what the
- initialization behavior is. There's no question about provider
- implementations automatically registering themselves in a
- nondeterministic order. It also means that implementations do not
- need to know anything about the provider interface, which makes
- them easier to test in isolation. Another advantage of this
- approach is that a developer who wants to develop their own
- crypto provider can do so in complete isolation from the qpdf
- library and, with just two calls, can make qpdf use their
- provider in their application. If they decided to contribute
- their code, plugging it into the qpdf library would require a
- very small change to qpdf's source code.
- </para>
- <para>
- The decision to make the crypto provider selectable at runtime
- was one I struggled with a little, but I decided to do it for
- various reasons. Allowing an end user to switch crypto providers
- easily could be very useful for reproducing a potential bug. If a
- user reports a bug that some cryptographic thing is broken, I can
- easily ask that person to try with the
- <literal>QPDF_CRYPTO_PROVIDER</literal> variable set to different
- values. The same could apply in the event of a performance
- problem. This also makes it easier for qpdf's own test suite to
- exercise code with different providers without having to make
- every program that links with qpdf aware of the possibility of
- multiple providers. In qpdf's continuous integration environment,
- the entire test suite is run for each supported crypto provider.
- This is made simple by being able to select the provider using an
- environment variable.
- </para>
- <para>
- Finally, making crypto providers selectable in this way establish
- a pattern that I may follow again in the future for stream filter
- providers. One could imagine a future enhancement where someone
- could provide their own implementations for basic filters like
- <literal>/FlateDecode</literal> or for other filters that qpdf
- doesn't support. Implementing the registration functions and
- internal storage of registered providers was also easier using
- C++-11's functional interfaces, which was another reason to
- require C++-11 at this time.
- </para>
- </sect2>
- </sect1>
- <sect1 id="ref.packaging">
- <title>Notes for Packagers</title>
- <para>
- If you are packaging qpdf for an operating system distribution,
- here are some things you may want to keep in mind:
- <itemizedlist>
- <listitem>
- <para>
- Starting in qpdf version 9.1.1, qpdf no longer has a runtime
- dependency on perl. This is because fix-qdf was rewritten in
- C++. However, qpdf still has a build-time dependency on perl.
- </para>
- </listitem>
- <listitem>
- <para>
- Make sure you are getting the intended behavior with regard to
- crypto providers. Read <xref linkend="ref.crypto.build"/> for
- details.
- </para>
- </listitem>
- <listitem>
- <para>
- Passing @1@option@1@--enable-show-failed-test-output@2@option@2@ to
- @1@command@1@./configure@2@command@2@ will cause any failed test
- output to be written to the console. This can be very useful
- for seeing test failures generated by autobuilders where you
- can't access qtest.log after the fact.
- </para>
- </listitem>
- <listitem>
- <para>
- If qpdf's build environment detects the presence of autoconf
- and related tools, it will check to ensure that automatically
- generated files are up-to-date with recorded checksums and fail
- if it detects a discrepancy. This feature is intended to
- prevent you from accidentally forgetting to regenerate
- automatic files after modifying their sources. If your
- packaging environment automatically refreshes automatic files,
- it can cause this check to fail. Suppress qpdf's checks by
- passing @1@option@1@--disable-check-autofiles@2@option@2@ to
- @1@command@1@/.configure@2@command@2@. This is safe since qpdf's
- @1@command@1@autogen.sh@2@command@2@ just runs autotools in the normal
- way.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF's @1@command@1@make install@2@command@2@ does not install
- completion files by default, but as a packager, it's good if
- you install them wherever your distribution expects such files
- to go. You can find completion files to install in the
- @1@filename@1@completions@2@filename@2@ directory.
- </para>
- </listitem>
- <listitem>
- <para>
- Packagers are encouraged to install the source files from the
- @1@filename@1@examples@2@filename@2@ directory along with qpdf
- development packages.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- </chapter>
- <chapter id="ref.using">
- <title>Running QPDF</title>
- <para>
- This chapter describes how to run the qpdf program from the command
- line.
- </para>
- <sect1 id="ref.invocation">
- <title>Basic Invocation</title>
- <para>
- When running qpdf, the basic invocation is as follows:
-
- <programlisting>@1@command@1@qpdf@2@command@2@@1@option@1@ [ @1@replaceable@1@options@2@replaceable@2@ ] { @1@replaceable@1@infilename@2@replaceable@2@ | @1@option@1@--empty@2@option@2@ } [ @1@replaceable@1@page_selection_options@2@replaceable@2@ ] @1@replaceable@1@outfilename@2@replaceable@2@@2@option@2@
-</programlisting>
- This converts PDF file @1@option@1@infilename@2@option@2@ to PDF file
- @1@option@1@outfilename@2@option@2@. The output file is functionally
- identical to the input file but may have been structurally
- reorganized. Also, orphaned objects will be removed from the
- file. Many transformations are available as controlled by the
- options below. In place of @1@option@1@infilename@2@option@2@, the
- parameter @1@option@1@--empty@2@option@2@ may be specified. This causes
- qpdf to use a dummy input file that contains zero pages. The only
- normal use case for using @1@option@1@--empty@2@option@2@ would be if you
- were going to add pages from another source, as discussed in <xref
- linkend="ref.page-selection"/>.
- </para>
- <para>
- If @1@option@1@@filename@2@option@2@ appears as a word anywhere in the
- command-line, it will be read line by line, and each line will be
- treated as a command-line argument. Leading and trailing
- whitespace is intentionally not removed from lines, which makes it
- possible to handle arguments that start or end with spaces. The
- @1@option@1@@-@2@option@2@ option allows arguments to be read from
- standard input. This allows qpdf to be invoked with an arbitrary
- number of arbitrarily long arguments. It is also very useful for
- avoiding having to pass passwords on the command line. Note that
- the @1@option@1@@filename@2@option@2@ can't appear in the middle of an
- argument, so constructs such as @1@option@1@--arg=@option@2@option@2@
- will not work. You would have to include the argument and its
- options together in the arguments file.
- </para>
- <para>
- @1@option@1@outfilename@2@option@2@ does not have to be seekable, even
- when generating linearized files. Specifying
- "@1@option@1@-@2@option@2@" as @1@option@1@outfilename@2@option@2@
- means to write to standard output. If you want to overwrite the
- input file with the output, use the option
- @1@option@1@--replace-input@2@option@2@ and omit the output file name.
- You can't specify the same file as both the input and the output.
- If you do this, qpdf will tell you about the
- @1@option@1@--replace-input@2@option@2@ option.
- </para>
- <para>
- Most options require an output file, but some testing or
- inspection commands do not. These are specifically noted.
- </para>
- <sect2 id="ref.exit-status">
- <title>Exit Status</title>
- <para>
- The exit status of @1@command@1@qpdf@2@command@2@ may be interpreted as
- follows:
- <itemizedlist>
- <listitem>
- <para>
- <literal>0</literal>: no errors or warnings were found. The
- file may still have problems qpdf can't detect. If
- @1@option@1@--warning-exit-0@2@option@2@ was specified, exit status 0
- is used even if there are warnings.
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>2</literal>: errors were found. qpdf was not able to
- fully process the file.
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>3</literal>: qpdf encountered problems that it was
- able to recover from. In some cases, the resulting file may
- still be damaged. Note that qpdf still exits with status
- <literal>3</literal> if it finds warnings even when
- @1@option@1@--no-warn@2@option@2@ is specified. With
- @1@option@1@--warning-exit-0@2@option@2@, warnings without errors
- exit with status 0 instead of 3.
- </para>
- </listitem>
- </itemizedlist>
- Note that @1@command@1@qpdf@2@command@2@ never exists with status
- <literal>1</literal>. If you get an exit status of
- <literal>1</literal>, it was something else, like the shell not
- being able to find or execute @1@command@1@qpdf@2@command@2@.
- </para>
- </sect2>
- </sect1>
- <sect1 id="ref.shell-completion">
- <title>Shell Completion</title>
- <para>
- Starting in qpdf version 8.3.0, qpdf provides its own completion
- support for zsh and bash. You can enable bash completion with
- @1@command@1@eval $(qpdf --completion-bash)@2@command@2@ and zsh
- completion with @1@command@1@eval $(qpdf --completion-zsh)@2@command@2@.
- If @1@command@1@qpdf@2@command@2@ is not in your path, you should invoke
- it above with an absolute path. If you invoke it with a relative
- path, it will warn you, and the completion won't work if you're in
- a different directory.
- </para>
- <para>
- qpdf will use <literal>argv[0]</literal> to figure out where its
- executable is. This may produce unwanted results in some cases,
- especially if you are trying to use completion with copy of qpdf
- that is built from source. You can specify a full path to the qpdf
- you want to use for completion in the
- <literal>QPDF_EXECUTABLE</literal> environment variable.
- </para>
- </sect1>
- <sect1 id="ref.basic-options">
- <title>Basic Options</title>
- <para>
- The following options are the most common ones and perform
- commonly needed transformations.
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--help@2@option@2@</term>
- <listitem>
- <para>
- Display command-line invocation help.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--version@2@option@2@</term>
- <listitem>
- <para>
- Display the current version of qpdf.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--copyright@2@option@2@</term>
- <listitem>
- <para>
- Show detailed copyright information.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-crypto@2@option@2@</term>
- <listitem>
- <para>
- Show a list of available crypto providers, each on a line by
- itself. The default provider is always listed first. See <xref
- linkend="ref.crypto"/> for more information about crypto
- providers.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--completion-bash@2@option@2@</term>
- <listitem>
- <para>
- Output a completion command you can eval to enable shell
- completion from bash.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--completion-zsh@2@option@2@</term>
- <listitem>
- <para>
- Output a completion command you can eval to enable shell
- completion from zsh.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--password=@1@replaceable@1@password@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Specifies a password for accessing encrypted files. To read
- the password from a file or standard input, you can use
- @1@option@1@--password-file@2@option@2@, added in qpdf 10.2. Note
- that you can also use @1@option@1@@filename@2@option@2@ or
- @1@option@1@@-@2@option@2@ as described above to put the password in
- a file or pass it via standard input, but you would do so by
- specifying the entire
- @1@option@1@--password=@1@replaceable@1@password@2@replaceable@2@@2@option@2@
- option in the file. Syntax such as
- @1@option@1@--password=@filename@2@option@2@ won't work since
- @1@option@1@@filename@2@option@2@ is not recognized in the middle of
- an argument.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--password-file=@1@replaceable@1@filename@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Reads the first line from the specified file and uses it as
- the password for accessing encrypted files.
- @1@option@1@@1@replaceable@1@filename@2@replaceable@2@@2@option@2@ may be
- <literal>-</literal> to read the password from standard input.
- Note that, in this case, the password is echoed and there is
- no prompt, so use with caution.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--is-encrypted@2@option@2@</term>
- <listitem>
- <para>
- Silently exit with status 0 if the file is encrypted or status
- 2 if the file is not encrypted. This is useful for shell
- scripts. Other options are ignored if this is given. This
- option is mutually exclusive with
- @1@option@1@--requires-password@2@option@2@. Both this option and
- @1@option@1@--requires-password@2@option@2@ exit with status 2 for
- non-encrypted files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--requires-password@2@option@2@</term>
- <listitem>
- <para>
- Silently exit with status 0 if a password (other than as
- supplied) is required. Exit with status 2 if the file is not
- encrypted. Exit with status 3 if the file is encrypted but
- requires no password or the correct password has been
- supplied. This is useful for shell scripts. Note that any
- supplied password is used when opening the file. When used
- with a @1@option@1@--password@2@option@2@ option, this option can be
- used to check the correctness of the password. In that case,
- an exit status of 3 means the file works with the supplied
- password. This option is mutually exclusive with
- @1@option@1@--is-encrypted@2@option@2@. Both this option and
- @1@option@1@--is-encrypted@2@option@2@ exit with status 2 for
- non-encrypted files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--verbose@2@option@2@</term>
- <listitem>
- <para>
- Increase verbosity of output. For now, this just prints some
- indication of any file that it creates.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--progress@2@option@2@</term>
- <listitem>
- <para>
- Indicate progress while writing files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--no-warn@2@option@2@</term>
- <listitem>
- <para>
- Suppress writing of warnings to stderr. If warnings were
- detected and suppressed, @1@command@1@qpdf@2@command@2@ will still
- exit with exit code 3. See also
- @1@option@1@--warning-exit-0@2@option@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--warning-exit-0@2@option@2@</term>
- <listitem>
- <para>
- If warnings are found but no errors, exit with exit code 0
- instead 3. When combined with @1@option@1@--no-warn@2@option@2@, the
- effect is for @1@command@1@qpdf@2@command@2@ to completely ignore
- warnings.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--linearize@2@option@2@</term>
- <listitem>
- <para>
- Causes generation of a linearized (web-optimized) output file.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--replace-input@2@option@2@</term>
- <listitem>
- <para>
- If specified, the output file name should be omitted. This
- option tells qpdf to replace the input file with the output.
- It does this by writing to
- @1@filename@1@@1@replaceable@1@infilename@2@replaceable@2@.~qpdf-temp#@2@filename@2@
- and, when done, overwriting the input file with the temporary
- file. If there were any warnings, the original input is saved
- as
- @1@filename@1@@1@replaceable@1@infilename@2@replaceable@2@.~qpdf-orig@2@filename@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--copy-encryption=file@2@option@2@</term>
- <listitem>
- <para>
- Encrypt the file using the same encryption parameters,
- including user and owner password, as the specified file. Use
- @1@option@1@--encryption-file-password@2@option@2@ to specify a password
- if one is needed to open this file. Note that copying the
- encryption parameters from a file also copies the first half
- of <literal>/ID</literal> from the file since this is part of
- the encryption parameters.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--encryption-file-password=password@2@option@2@</term>
- <listitem>
- <para>
- If the file specified with @1@option@1@--copy-encryption@2@option@2@
- requires a password, specify the password using this option.
- Note that only one of the user or owner password is required.
- Both passwords will be preserved since QPDF does not
- distinguish between the two passwords. It is possible to
- preserve encryption parameters, including the owner password,
- from a file even if you don't know the file's owner password.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--allow-weak-crypto@2@option@2@</term>
- <listitem>
- <para>
- Starting with version 10.4, qpdf issues warnings when
- requested to create files using RC4 encryption. This option
- suppresses those warnings. In future versions of qpdf, qpdf
- will refuse to create files with weak cryptography when this
- flag is not given. See <xref linkend="ref.weak-crypto"/> for
- additional details.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--encrypt options --@2@option@2@</term>
- <listitem>
- <para>
- Causes generation an encrypted output file. Please see <xref
- linkend="ref.encryption-options"/> for details on how to
- specify encryption parameters.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--decrypt@2@option@2@</term>
- <listitem>
- <para>
- Removes any encryption on the file. A password must be
- supplied if the file is password protected.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--password-is-hex-key@2@option@2@</term>
- <listitem>
- <para>
- Overrides the usual computation/retrieval of the PDF file's
- encryption key from user/owner password with an explicit
- specification of the encryption key. When this option is
- specified, the argument to the @1@option@1@--password@2@option@2@
- option is interpreted as a hexadecimal-encoded key value. This
- only applies to the password used to open the main input file.
- It does not apply to other files opened by
- @1@option@1@--pages@2@option@2@ or other options or to files being
- written.
- </para>
- <para>
- Most users will never have a need for this option, and no
- standard viewers support this mode of operation, but it can be
- useful for forensic or investigatory purposes. For example, if
- a PDF file is encrypted with an unknown password, a
- brute-force attack using the key directly is sometimes more
- efficient than one using the password. Also, if a file is
- heavily damaged, it may be possible to derive the encryption
- key and recover parts of the file using it directly. To expose
- the encryption key used by an encrypted file that you can open
- normally, use the @1@option@1@--show-encryption-key@2@option@2@
- option.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--suppress-password-recovery@2@option@2@</term>
- <listitem>
- <para>
- Ordinarily, qpdf attempts to automatically compensate for
- passwords specified in the wrong character encoding. This
- option suppresses that behavior. Under normal conditions,
- there are no reasons to use this option. See <xref
- linkend="ref.unicode-passwords"/> for a discussion
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--password-mode=@1@replaceable@1@mode@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- This option can be used to fine-tune how qpdf interprets
- Unicode (non-ASCII) password strings passed on the command
- line. With the exception of the @1@option@1@hex-bytes@2@option@2@
- mode, these only apply to passwords provided when encrypting
- files. The @1@option@1@hex-bytes@2@option@2@ mode also applies to
- passwords specified for reading files. For additional
- discussion of the supported password modes and when you might
- want to use them, see <xref linkend="ref.unicode-passwords"/>.
- The following modes are supported:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@auto@2@option@2@: Automatically determine whether the
- specified password is a properly encoded Unicode (UTF-8)
- string, and transcode it as required by the PDF spec based
- on the type encryption being applied. On Windows starting
- with version 8.4.0, and on almost all other modern
- platforms, incoming passwords will be properly encoded in
- UTF-8, so this is almost always what you want.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@unicode@2@option@2@: Tells qpdf that the incoming
- password is UTF-8, overriding whatever its automatic
- detection determines. The only difference between this mode
- and @1@option@1@auto@2@option@2@ is that qpdf will fail with an
- error message if the password is not valid UTF-8 instead of
- falling back to @1@option@1@bytes@2@option@2@ mode with a warning.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@bytes@2@option@2@: Interpret the password as a literal
- byte string. For non-Windows platforms, this is what
- versions of qpdf prior to 8.4.0 did. For Windows platforms,
- there is no way to specify strings of binary data on the
- command line directly, but you can use the
- @1@option@1@@filename@2@option@2@ option to do it, in which case
- this option forces qpdf to respect the string of bytes as
- provided. This option will allow you to encrypt PDF files
- with passwords that will not be usable by other readers.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@hex-bytes@2@option@2@: Interpret the password as a
- hex-encoded string. This provides a way to pass binary data
- as a password on all platforms including Windows. As with
- @1@option@1@bytes@2@option@2@, this option may allow creation of
- files that can't be opened by other readers. This mode
- affects qpdf's interpretation of passwords specified for
- decrypting files as well as for encrypting them. It makes
- it possible to specify strings that are encoded in some
- manner other than the system's default encoding.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--rotate=[+|-]angle[:page-range]@2@option@2@</term>
- <listitem>
- <para>
- Apply rotation to specified pages. The
- @1@option@1@page-range@2@option@2@ portion of the option value has
- the same format as page ranges in <xref
- linkend="ref.page-selection"/>. If the page range is omitted,
- the rotation is applied to all pages. The
- @1@option@1@angle@2@option@2@ portion of the parameter may be either
- 0, 90, 180, or 270. If preceded by @1@option@1@+@2@option@2@ or
- @1@option@1@-@2@option@2@, the angle is added to or subtracted from
- the specified pages' original rotations. This is almost always
- what you want. Otherwise the pages' rotations are set to the
- exact value, which may cause the appearances of the pages to
- be inconsistent, especially for scans. For example, the
- command @1@command@1@qpdf in.pdf out.pdf --rotate=+90:2,4,6
- --rotate=180:7-8@2@command@2@ would rotate pages 2, 4, and 6 90
- degrees clockwise from their original rotation and force the
- rotation of pages 7 through 8 to 180 degrees regardless of
- their original rotation, and the command @1@command@1@qpdf in.pdf
- out.pdf --rotate=+180@2@command@2@ would rotate all pages by 180
- degrees.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--keep-files-open=@1@replaceable@1@[yn]@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- This option controls whether qpdf keeps individual files open
- while merging. Prior to version 8.1.0, qpdf always kept all
- files open, but this meant that the number of files that could
- be merged was limited by the operating system's open file
- limit. Version 8.1.0 opened files as they were referenced and
- closed them after each read, but this caused a major
- performance impact. Version 8.2.0 optimized the performance
- but did so in a way that, for local file systems, there was a
- small but unavoidable performance hit, but for networked file
- systems, the performance impact could be very high. Starting
- with version 8.2.1, the default behavior is that files are
- kept open if no more than 200 files are specified, but that
- the behavior can be explicitly overridden with the
- @1@option@1@--keep-files-open@2@option@2@ flag. If you are merging
- more than 200 files but less than the operating system's max
- open files limit, you may want to use
- @1@option@1@--keep-files-open=y@2@option@2@, especially if working
- over a networked file system. If you are using a local file
- system where the overhead is low and you might sometimes merge
- more than the OS limit's number of files from a script and are
- not worried about a few seconds additional processing time,
- you may want to specify @1@option@1@--keep-files-open=n@2@option@2@.
- The threshold for switching may be changed from the default
- 200 with the @1@option@1@--keep-files-open-threshold@2@option@2@
- option.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--keep-files-open-threshold=@1@replaceable@1@count@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- If specified, overrides the default value of 200 used as the
- threshold for qpdf deciding whether or not to keep files open.
- See @1@option@1@--keep-files-open@2@option@2@ for details.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--pages options --@2@option@2@</term>
- <listitem>
- <para>
- Select specific pages from one or more input files. See <xref
- linkend="ref.page-selection"/> for details on how to do page
- selection (splitting and merging).
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--collate=@1@replaceable@1@n@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- When specified, collate rather than concatenate pages from
- files specified with @1@option@1@--pages@2@option@2@. With a numeric
- argument, collate in groups of @1@replaceable@1@n@2@replaceable@2@.
- The default is 1. See <xref linkend="ref.page-selection"/> for
- additional details.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--flatten-rotation@2@option@2@</term>
- <listitem>
- <para>
- For each page that is rotated using the
- <literal>/Rotate</literal> key in the page's dictionary,
- remove the <literal>/Rotate</literal> key and implement the
- identical rotation semantics by modifying the page's contents.
- This option can be useful to prepare files for buggy PDF
- applications that don't properly handle rotated pages.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--split-pages=[n]@2@option@2@</term>
- <listitem>
- <para>
- Write each group of @1@option@1@n@2@option@2@ pages to a separate
- output file. If @1@option@1@n@2@option@2@ is not specified, create
- single pages. Output file names are generated as follows:
- <itemizedlist>
- <listitem>
- <para>
- If the string <literal>%d</literal> appears in the output
- file name, it is replaced with a range of zero-padded page
- numbers starting from 1.
- </para>
- </listitem>
- <listitem>
- <para>
- Otherwise, if the output file name ends in
- @1@filename@1@.pdf@2@filename@2@ (case insensitive), a zero-padded
- page range, preceded by a dash, is inserted before the file
- extension.
- </para>
- </listitem>
- <listitem>
- <para>
- Otherwise, the file name is appended with a zero-padded
- page range preceded by a dash.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Page ranges are a single number in the case of single-page
- groups or two numbers separated by a dash otherwise.
- For example, if @1@filename@1@infile.pdf@2@filename@2@ has 12 pages
- <itemizedlist>
- <listitem>
- <para>
- @1@command@1@qpdf --split-pages infile.pdf %d-out@2@command@2@
- would generate files @1@filename@1@01-out@2@filename@2@ through
- @1@filename@1@12-out@2@filename@2@
- </para>
- </listitem>
- <listitem>
- <para>
- @1@command@1@qpdf --split-pages=2 infile.pdf
- outfile.pdf@2@command@2@ would generate files
- @1@filename@1@outfile-01-02.pdf@2@filename@2@ through
- @1@filename@1@outfile-11-12.pdf@2@filename@2@
- </para>
- </listitem>
- <listitem>
- <para>
- @1@command@1@qpdf --split-pages infile.pdf
- something.else@2@command@2@ would generate files
- @1@filename@1@something.else-01@2@filename@2@ through
- @1@filename@1@something.else-12@2@filename@2@
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Note that outlines, threads, and other global features of the
- original PDF file are not preserved. For each page of output,
- this option creates an empty PDF and copies a single page from
- the output into it. If you require the global data, you will
- have to run @1@command@1@qpdf@2@command@2@ with the
- @1@option@1@--pages@2@option@2@ option once for each file. Using
- @1@option@1@--split-pages@2@option@2@ is much faster if you don't
- require the global data.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--overlay options --@2@option@2@</term>
- <listitem>
- <para>
- Overlay pages from another file onto the output pages. See
- <xref linkend="ref.overlay-underlay"/> for details on
- overlay/underlay.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--underlay options --@2@option@2@</term>
- <listitem>
- <para>
- Overlay pages from another file onto the output pages. See
- <xref linkend="ref.overlay-underlay"/> for details on
- overlay/underlay.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- <para>
- Password-protected files may be opened by specifying a password.
- By default, qpdf will preserve any encryption data associated with
- a file. If @1@option@1@--decrypt@2@option@2@ is specified, qpdf will
- attempt to remove any encryption information. If
- @1@option@1@--encrypt@2@option@2@ is specified, qpdf will replace the
- document's encryption parameters with whatever is specified.
- </para>
- <para>
- Note that qpdf does not obey encryption restrictions already
- imposed on the file. Doing so would be meaningless since qpdf can
- be used to remove encryption from the file entirely. This
- functionality is not intended to be used for bypassing copyright
- restrictions or other restrictions placed on files by their
- producers.
- </para>
- <para>
- Prior to 8.4.0, in the case of passwords that contain characters
- that fall outside of 7-bit US-ASCII, qpdf left the burden of
- supplying properly encoded encryption and decryption passwords to
- the user. Starting in qpdf 8.4.0, qpdf does this automatically in
- most cases. For an in-depth discussion, please see <xref
- linkend="ref.unicode-passwords"/>. Previous versions of this
- manual described workarounds using the @1@command@1@iconv@2@command@2@
- command. Such workarounds are no longer required or recommended
- with qpdf 8.4.0. However, for backward compatibility, qpdf
- attempts to detect those workarounds and do the right thing in
- most cases.
- </para>
- </sect1>
- <sect1 id="ref.encryption-options">
- <title>Encryption Options</title>
- <para>
- To change the encryption parameters of a file, use the --encrypt
- flag. The syntax is
-
- <programlisting>@1@option@1@--encrypt @1@replaceable@1@user-password@2@replaceable@2@ @1@replaceable@1@owner-password@2@replaceable@2@ @1@replaceable@1@key-length@2@replaceable@2@ [ @1@replaceable@1@restrictions@2@replaceable@2@ ] --@2@option@2@
-</programlisting>
- Note that "@1@option@1@--@2@option@2@" terminates parsing of
- encryption flags and must be present even if no restrictions are
- present.
- </para>
- <para>
- Either or both of the user password and the owner password may be
- empty strings. Starting in qpdf 10.2, qpdf defaults to not
- allowing creation of PDF files with a non-empty user password, an
- empty owner password, and a 256-bit key since such files can be
- opened with no password. If you want to create such files, specify
- the encryption option @1@option@1@--allow-insecure@2@option@2@, as
- described below.
- </para>
- <para>
- The value for
- @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ may be 40,
- 128, or 256. The restriction flags are dependent upon key length.
- When no additional restrictions are given, the default is to be
- fully permissive.
- </para>
- <para>
- If @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ is 40,
- the following restriction options are available:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--print=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether or not to allow printing.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--modify=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether or not to allow document modification.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--extract=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether or not to allow text/image extraction.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--annotate=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether or not to allow comments and form fill-in
- and signing.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- If @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ is 128,
- the following restriction options are available:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--accessibility=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether or not to allow accessibility to visually
- impaired. The qpdf library disregards this field when AES is
- used or when 256-bit encryption is used. You should really
- never disable accessibility, but qpdf lets you do it in case
- you need to configure a file this way for testing purposes.
- The PDF spec says that conforming readers should disregard
- this permission and always allow accessibility.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--extract=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether or not to allow text/graphic extraction.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--assemble=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether document assembly (rotation and reordering
- of pages) is allowed.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--annotate=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether modifying annotations is allowed. This
- includes adding comments and filling in form fields. Also
- allows editing of form fields if
- @1@option@1@--modify-other=y@2@option@2@ is given.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--form=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Determines whether filling form fields is allowed.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--modify-other=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Allow all document editing except those controlled separately
- by the @1@option@1@--assemble@2@option@2@,
- @1@option@1@--annotate@2@option@2@, and @1@option@1@--form@2@option@2@
- options.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--print=@1@replaceable@1@print-opt@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Controls printing access.
- @1@option@1@@1@replaceable@1@print-opt@2@replaceable@2@@2@option@2@ may be
- one of the following:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@full@2@option@2@: allow full printing
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@low@2@option@2@: allow low-resolution printing only
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@none@2@option@2@: disallow printing
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--modify=@1@replaceable@1@modify-opt@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Controls modify access. This way of controlling modify access
- has less granularity than new options added in qpdf 8.4.
- @1@option@1@@1@replaceable@1@modify-opt@2@replaceable@2@@2@option@2@ may be
- one of the following:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@all@2@option@2@: allow full document modification
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@annotate@2@option@2@: allow comment authoring, form
- operations, and document assembly
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@form@2@option@2@: allow form field fill-in and signing
- and document assembly
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@assembly@2@option@2@: allow document assembly only
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@none@2@option@2@: allow no modifications
- </para>
- </listitem>
- </itemizedlist>
- Using the @1@option@1@--modify@2@option@2@ option does not allow you
- to create certain combinations of permissions such as allowing
- form filling but not allowing document assembly. Starting with
- qpdf 8.4, you can either just use the other options to control
- fields individually, or you can use something like
- @1@option@1@--modify=form --assembly=n@2@option@2@ to fine tune.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--cleartext-metadata@2@option@2@</term>
- <listitem>
- <para>
- If specified, any metadata stream in the document will be left
- unencrypted even if the rest of the document is encrypted.
- This also forces the PDF version to be at least 1.5.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--use-aes=[yn]@2@option@2@</term>
- <listitem>
- <para>
- If @1@option@1@--use-aes=y@2@option@2@ is specified, AES encryption
- will be used instead of RC4 encryption. This forces the PDF
- version to be at least 1.6.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--allow-insecure@2@option@2@</term>
- <listitem>
- <para>
- From qpdf 10.2, qpdf defaults to not allowing creation of PDF
- files where the user password is non-empty, the owner password
- is empty, and a 256-bit key is in use. Files created in this
- way are insecure since they can be opened without a password.
- Users would ordinarily never want to create such files. If you
- are using qpdf to intentionally created strange files for
- testing (a definite valid use of qpdf!), this option allows
- you to create such insecure files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--force-V4@2@option@2@</term>
- <listitem>
- <para>
- Use of this option forces the <literal>/V</literal> and
- <literal>/R</literal> parameters in the document's encryption
- dictionary to be set to the value <literal>4</literal>. As
- qpdf will automatically do this when required, there is no
- reason to ever use this option. It exists primarily for use
- in testing qpdf itself. This option also forces the PDF
- version to be at least 1.5.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- If @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ is 256,
- the minimum PDF version is 1.7 with extension level 8, and the
- AES-based encryption format used is the PDF 2.0 encryption method
- supported by Acrobat X. the same options are available as with
- 128 bits with the following exceptions:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--use-aes@2@option@2@</term>
- <listitem>
- <para>
- This option is not available with 256-bit keys. AES is always
- used with 256-bit encryption keys.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--force-V4@2@option@2@</term>
- <listitem>
- <para>
- This option is not available with 256 keys.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--force-R5@2@option@2@</term>
- <listitem>
- <para>
- If specified, qpdf sets the minimum version to 1.7 at
- extension level 3 and writes the deprecated encryption format
- used by Acrobat version IX. This option should not be used in
- practice to generate PDF files that will be in general use,
- but it can be useful to generate files if you are trying to
- test proper support in another application for PDF files
- encrypted in this way.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- The default for each permission option is to be fully permissive.
- </para>
- </sect1>
- <sect1 id="ref.page-selection">
- <title>Page Selection Options</title>
- <para>
- Starting with qpdf 3.0, it is possible to split and merge PDF
- files by selecting pages from one or more input files. Whatever
- file is given as the primary input file is used as the starting
- point, but its pages are replaced with pages as specified.
-
- <programlisting>@1@option@1@--pages @1@replaceable@1@input-file@2@replaceable@2@ [ @1@replaceable@1@--password=password@2@replaceable@2@ ] [ @1@replaceable@1@page-range@2@replaceable@2@ ] [ ... ] --@2@option@2@
-</programlisting>
- Multiple input files may be specified. Each one is given as the
- name of the input file, an optional password (if required to open
- the file), and the range of pages. Note that
- "@1@option@1@--@2@option@2@" terminates parsing of page
- selection flags.
- </para>
- <para>
- Starting with qpf 8.4, the special input file name
- "@1@filename@1@.@2@filename@2@" can be used as a shortcut for the
- primary input filename.
- </para>
- <para>
- For each file that pages should be taken from, specify the file, a
- password needed to open the file (if any), and a page range. The
- password needs to be given only once per file. If any of the
- input files are the same as the primary input file or the file
- used to copy encryption parameters (if specified), you do not need
- to repeat the password here. The same file can be repeated
- multiple times. If a file that is repeated has a password, the
- password only has to be given the first time. All non-page data
- (info, outlines, page numbers, etc.) are taken from the primary
- input file. To discard these, use @1@option@1@--empty@2@option@2@ as the
- primary input.
- </para>
- <para>
- Starting with qpdf 5.0.0, it is possible to omit the page range.
- If qpdf sees a value in the place where it expects a page range
- and that value is not a valid range but is a valid file name, qpdf
- will implicitly use the range <literal>1-z</literal>, meaning that
- it will include all pages in the file. This makes it possible to
- easily combine all pages in a set of files with a command like
- @1@command@1@qpdf --empty out.pdf --pages *.pdf --@2@command@2@.
- </para>
- <para>
- The page range is a set of numbers separated by commas, ranges of
- numbers separated dashes, or combinations of those. The character
- "z" represents the last page. A number preceded by an
- "r" indicates to count from the end, so
- <literal>r3-r1</literal> would be the last three pages of the
- document. Pages can appear in any order. Ranges can appear with a
- high number followed by a low number, which causes the pages to
- appear in reverse. Numbers may be repeated in a page range. A page
- range may be optionally appended with <literal>:even</literal> or
- <literal>:odd</literal> to indicate only the even or odd pages in
- the given range. Note that even and odd refer to the positions
- within the specified, range, not whether the original number is
- even or odd.
- </para>
- <para>
- Example page ranges:
- <itemizedlist>
- <listitem>
- <para>
- <literal>1,3,5-9,15-12</literal>: pages 1, 3, 5, 6, 7, 8,
- 9, 15, 14, 13, and 12 in that order.
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>z-1</literal>: all pages in the document in reverse
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>r3-r1</literal>: the last three pages of the document
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>r1-r3</literal>: the last three pages of the document
- in reverse order
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>1-20:even</literal>: even pages from 2 to 20
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>5,7-9,12:odd</literal>: pages 5, 8, and, 12, which are
- the pages in odd positions from among the original range, which
- represents pages 5, 7, 8, 9, and 12.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Starting in qpdf version 8.3, you can specify the
- @1@option@1@--collate@2@option@2@ option. Note that this option is
- specified outside of @1@option@1@--pages ... --@2@option@2@.
- When @1@option@1@--collate@2@option@2@ is specified, it changes the
- meaning of @1@option@1@--pages@2@option@2@ so that the specified files,
- as modified by page ranges, are collated rather than concatenated.
- For example, if you add the files @1@filename@1@odd.pdf@2@filename@2@ and
- @1@filename@1@even.pdf@2@filename@2@ containing odd and even pages of a
- document respectively, you could run @1@command@1@qpdf --collate
- odd.pdf --pages odd.pdf even.pdf -- all.pdf@2@command@2@ to collate
- the pages. This would pick page 1 from odd, page 1 from even, page
- 2 from odd, page 2 from even, etc. until all pages have been
- included. Any number of files and page ranges can be specified. If
- any file has fewer pages, that file is just skipped when its pages
- have all been included. For example, if you ran @1@command@1@qpdf
- --collate --empty --pages a.pdf 1-5 b.pdf 6-4 c.pdf r1 --
- out.pdf@2@command@2@, you would get the following pages in this
- order:
- <itemizedlist>
- <listitem><para>a.pdf page 1</para></listitem>
- <listitem><para>b.pdf page 6</para></listitem>
- <listitem><para>c.pdf last page</para></listitem>
- <listitem><para>a.pdf page 2</para></listitem>
- <listitem><para>b.pdf page 5</para></listitem>
- <listitem><para>a.pdf page 3</para></listitem>
- <listitem><para>b.pdf page 4</para></listitem>
- <listitem><para>a.pdf page 4</para></listitem>
- <listitem><para>a.pdf page 5</para></listitem>
- </itemizedlist>
- </para>
- <para>
- Starting in qpdf version 10.2, you may specify a numeric argument
- to @1@option@1@--collate@2@option@2@. With
- @1@option@1@--collate=@1@replaceable@1@n@2@replaceable@2@@2@option@2@, pull
- groups of @1@replaceable@1@n@2@replaceable@2@ pages from each file,
- again, stopping when there are no more pages. For example, if you
- ran @1@command@1@qpdf --collate=2 --empty --pages a.pdf 1-5 b.pdf 6-4
- c.pdf r1 -- out.pdf@2@command@2@, you would get the following pages
- in this order:
- <itemizedlist>
- <listitem><para>a.pdf page 1</para></listitem>
- <listitem><para>a.pdf page 2</para></listitem>
- <listitem><para>b.pdf page 6</para></listitem>
- <listitem><para>b.pdf page 5</para></listitem>
- <listitem><para>c.pdf last page</para></listitem>
- <listitem><para>a.pdf page 3</para></listitem>
- <listitem><para>a.pdf page 4</para></listitem>
- <listitem><para>b.pdf page 4</para></listitem>
- <listitem><para>a.pdf page 5</para></listitem>
- </itemizedlist>
- </para>
- <para>
- Starting in qpdf version 8.3, when you split and merge files, any
- page labels (page numbers) are preserved in the final file. It is
- expected that more document features will be preserved by
- splitting and merging. In the mean time, semantics of splitting
- and merging vary across features. For example, the document's
- outlines (bookmarks) point to actual page objects, so if you
- select some pages and not others, bookmarks that point to pages
- that are in the output file will work, and remaining bookmarks
- will not work. A future version of @1@command@1@qpdf@2@command@2@ may do
- a better job at handling these issues. (Note that the qpdf library
- already contains all of the APIs required in order to implement
- this in your own application if you need it.) In the mean time,
- you can always use @1@option@1@--empty@2@option@2@ as the primary input
- file to avoid copying all of that from the first file. For
- example, to take pages 1 through 5 from a
- @1@filename@1@infile.pdf@2@filename@2@ while preserving all metadata
- associated with that file, you could use
-
- <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@infile.pdf --pages . 1-5 -- outfile.pdf@2@option@2@
-</programlisting>
- If you wanted pages 1 through 5 from
- @1@filename@1@infile.pdf@2@filename@2@ but you wanted the rest of the
- metadata to be dropped, you could instead run
-
- <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@--empty --pages infile.pdf 1-5 -- outfile.pdf@2@option@2@
-</programlisting>
- If you wanted to take pages 1 through 5 from
- @1@filename@1@file1.pdf@2@filename@2@ and pages 11 through 15 from
- @1@filename@1@file2.pdf@2@filename@2@ in reverse, taking document-level
- metadata from @1@filename@1@file2.pdf@2@filename@2@, you would run
-
- <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@file2.pdf --pages file1.pdf 1-5 . 15-11 -- outfile.pdf@2@option@2@
-</programlisting>
- If, for some reason, you wanted to take the first page of an
- encrypted file called @1@filename@1@encrypted.pdf@2@filename@2@ with
- password <literal>pass</literal> and repeat it twice in an output
- file, and if you wanted to drop document-level metadata but
- preserve encryption, you would use
-
- <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@--empty --copy-encryption=encrypted.pdf --encryption-file-password=pass
---pages encrypted.pdf --password=pass 1 ./encrypted.pdf --password=pass 1 --
-outfile.pdf@2@option@2@
-</programlisting>
- Note that we had to specify the password all three times because
- giving a password as @1@option@1@--encryption-file-password@2@option@2@
- doesn't count for page selection, and as far as qpdf is concerned,
- @1@filename@1@encrypted.pdf@2@filename@2@ and
- @1@filename@1@./encrypted.pdf@2@filename@2@ are separated files. These
- are all corner cases that most users should hopefully never have
- to be bothered with.
- </para>
- <para>
- Prior to version 8.4, it was not possible to specify the same page
- from the same file directly more than once, and the workaround of
- specifying the same file in more than one way was required.
- Version 8.4 removes this limitation, but there is still a valid
- use case. When you specify the same page from the same file more
- than once, qpdf will share objects between the pages. If you are
- going to do further manipulation on the file and need the two
- instances of the same original page to be deep copies, then you
- can specify the file in two different ways. For example
- @1@command@1@qpdf in.pdf --pages . 1 ./in.pdf 1 -- out.pdf@2@command@2@
- would create a file with two copies of the first page of the
- input, and the two copies would share any objects in common. This
- includes fonts, images, and anything else the page references.
- </para>
- </sect1>
- <sect1 id="ref.overlay-underlay">
- <title>Overlay and Underlay Options</title>
- <para>
- Starting with qpdf 8.4, it is possible to overlay or underlay
- pages from other files onto the output generated by qpdf. Specify
- overlay or underlay as follows:
-
- <programlisting>{ @1@option@1@--overlay@2@option@2@ | @1@option@1@--underlay@2@option@2@ } @1@replaceable@1@file@2@replaceable@2@ [ @1@option@1@options@2@option@2@ ] @1@option@1@--@2@option@2@
-</programlisting>
- Overlay and underlay options are processed late, so they can be
- combined with other like merging and will apply to the final
- output. The @1@option@1@--overlay@2@option@2@ and
- @1@option@1@--underlay@2@option@2@ options work the same way, except
- underlay pages are drawn underneath the page to which they are
- applied, possibly obscured by the original page, and overlay files
- are drawn on top of the page to which they are applied, possibly
- obscuring the page. You can combine overlay and underlay.
- </para>
- <para>
- The default behavior of overlay and underlay is that pages are
- taken from the overlay/underlay file in sequence and applied to
- corresponding pages in the output until there are no more output
- pages. If the overlay or underlay file runs out of pages,
- remaining output pages are left alone. This behavior can be
- modified by options, which are provided between the
- @1@option@1@--overlay@2@option@2@ or @1@option@1@--underlay@2@option@2@ flag and
- the @1@option@1@--@2@option@2@ option. The following options are
- supported:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@--password=password@2@option@2@: supply a password if the
- overlay/underlay file is encrypted.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--to=page-range@2@option@2@: a range of pages in the same
- form at described in <xref linkend="ref.page-selection"/>
- indicates which pages in the output should have the
- overlay/underlay applied. If not specified, overlay/underlay
- are applied to all pages.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--from=[page-range]@2@option@2@: a range of pages that
- specifies which pages in the overlay/underlay file will be used
- for overlay or underlay. If not specified, all pages will be
- used. This can be explicitly specified to be empty if
- @1@option@1@--repeat@2@option@2@ is used.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--repeat=page-range@2@option@2@: an optional range of
- pages that specifies which pages in the overlay/underlay file
- will be repeated after the "from" pages are used
- up. If you want to repeat a range of pages starting at the
- beginning, you can explicitly use @1@option@1@--from=@2@option@2@.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Here are some examples.
- <itemizedlist>
- <listitem>
- <para>
- @1@command@1@--overlay o.pdf --to=1-5 --from=1-3
- --repeat=4 --@2@command@2@: overlay the first three pages from file
- @1@filename@1@o.pdf@2@filename@2@ onto the first three pages of the
- output, then overlay page 4 from @1@filename@1@o.pdf@2@filename@2@
- onto pages 4 and 5 of the output. Leave remaining output pages
- untouched.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@command@1@--underlay footer.pdf --from= --repeat=1,2 --@2@command@2@:
- Underlay page 1 of @1@filename@1@footer.pdf@2@filename@2@ on all odd
- output pages, and underlay page 2 of
- @1@filename@1@footer.pdf@2@filename@2@ on all even output pages.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- <sect1 id="ref.attachments">
- <title>Embedded Files/Attachments Options</title>
- <para>
- Starting with qpdf 10.2, you can work with file attachments in PDF
- files from the command line. The following options are available:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--list-attachments@2@option@2@</term>
- <listitem>
- <para>
- Show the "key" and stream number for embedded
- files. With @1@option@1@--verbose@2@option@2@, additional
- information, including preferred file name, description,
- dates, and more are also displayed. The key is usually but not
- always equal to the file name, and is needed by some of the
- other options.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-attachment=@1@replaceable@1@key@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Write the contents of the specified attachment to standard
- output as binary data. The key should match one of the keys
- shown by @1@option@1@--list-attachments@2@option@2@. If specified
- multiple times, only the last attachment will be shown.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--add-attachment @1@replaceable@1@file@2@replaceable@2@ @1@replaceable@1@options@2@replaceable@2@ --@2@option@2@</term>
- <listitem>
- <para>
- Add or replace an attachment with the contents of
- @1@replaceable@1@file@2@replaceable@2@. This may be specified more
- than once. The following additional options may appear before
- the <literal>--</literal> that ends this option:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--key=@1@replaceable@1@key@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- The key to use to register the attachment in the embedded
- files table. Defaults to the last path element of
- @1@replaceable@1@file@2@replaceable@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--filename=@1@replaceable@1@name@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- The file name to be used for the attachment. This is what is usually
- displayed to the user and is the name most graphical PDF
- viewers will use when saving a file. It defaults to the
- last path element of @1@replaceable@1@file@2@replaceable@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--creationdate=@1@replaceable@1@date@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- The attachment's creation date in PDF format; defaults to
- the current time. The date format is explained below.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--moddate=@1@replaceable@1@date@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- The attachment's modification date in PDF format; defaults
- to the current time. The date format is explained below.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--mimetype=@1@replaceable@1@type/subtype@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- The mime type for the attachment, e.g.
- <literal>text/plain</literal> or
- <literal>application/pdf</literal>. Note that the mimetype
- appears in a field called <literal>/Subtype</literal> in
- the PDF but actually includes the full type and subtype of
- the mime type.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--description=@1@replaceable@1@&quot;text&quot;@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Descriptive text for the attachment, displayed by some PDF
- viewers.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--replace@2@option@2@</term>
- <listitem>
- <para>
- Indicates that any existing attachment with the same key
- should be replaced by the new attachment. Otherwise,
- @1@command@1@qpdf@2@command@2@ gives an error if an attachment
- with that key is already present.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--remove-attachment=@1@replaceable@1@key@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Remove the specified attachment. This doesn't only remove the
- attachment from the embedded files table but also clears out
- the file specification. That means that any potential internal
- links to the attachment will be broken. This option may be
- specified multiple times. Run with @1@option@1@--verbose@2@option@2@
- to see status of the removal.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--copy-attachments-from @1@replaceable@1@file@2@replaceable@2@ @1@replaceable@1@options@2@replaceable@2@ --@2@option@2@</term>
- <listitem>
- <para>
- Copy attachments from another file. This may be specified more
- than once. The following additional options may appear before
- the <literal>--</literal> that ends this option:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--password=@1@replaceable@1@password@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- If required, the password needed to open
- @1@replaceable@1@file@2@replaceable@2@
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--prefix=@1@replaceable@1@prefix@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Only required if the file from which attachments are being
- copied has attachments with keys that conflict with
- attachments already in the file. In this case, the
- specified prefix will be prepended to each key. This
- affects only the key in the embedded files table, not the
- file name. The PDF specification doesn't preclude multiple
- attachments having the same file name.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- When a date is required, the date should conform to the PDF date
- format specification, which is
- <literal>D:</literal>@1@replaceable@1@yyyymmddhhmmss&lt;z&gt;@2@replaceable@2@,
- where @1@replaceable@1@&lt;z&gt;@2@replaceable@2@ is either
- <literal>Z</literal> for UTC or a timezone offset in the form
- @1@replaceable@1@-hh'mm'@2@replaceable@2@ or
- @1@replaceable@1@+hh'mm'@2@replaceable@2@. Examples:
- <literal>D:20210207161528-05'00'</literal>,
- <literal>D:20210207211528Z</literal>.
- </para>
- </sect1>
- <sect1 id="ref.advanced-parsing">
- <title>Advanced Parsing Options</title>
- <para>
- These options control aspects of how qpdf reads PDF files. Mostly
- these are of use to people who are working with damaged files.
- There is little reason to use these options unless you are trying
- to solve specific problems. The following options are available:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--suppress-recovery@2@option@2@</term>
- <listitem>
- <para>
- Prevents qpdf from attempting to recover damaged files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--ignore-xref-streams@2@option@2@</term>
- <listitem>
- <para>
- Tells qpdf to ignore any cross-reference streams.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- <para>
- Ordinarily, qpdf will attempt to recover from certain types of
- errors in PDF files. These include errors in the cross-reference
- table, certain types of object numbering errors, and certain types
- of stream length errors. Sometimes, qpdf may think it has
- recovered but may not have actually recovered, so care should be
- taken when using this option as some data loss is possible. The
- @1@option@1@--suppress-recovery@2@option@2@ option will prevent qpdf from
- attempting recovery. In this case, it will fail on the first
- error that it encounters.
- </para>
- <para>
- Ordinarily, qpdf reads cross-reference streams when they are
- present in a PDF file. If @1@option@1@--ignore-xref-streams@2@option@2@
- is specified, qpdf will ignore any cross-reference streams for
- hybrid PDF files. The purpose of hybrid files is to make some
- content available to viewers that are not aware of cross-reference
- streams. It is almost never desirable to ignore them. The only
- time when you might want to use this feature is if you are testing
- creation of hybrid PDF files and wish to see how a PDF consumer
- that doesn't understand object and cross-reference streams would
- interpret such a file.
- </para>
- </sect1>
- <sect1 id="ref.advanced-transformation">
- <title>Advanced Transformation Options</title>
- <para>
- These transformation options control fine points of how qpdf
- creates the output file. Mostly these are of use only to people
- who are very familiar with the PDF file format or who are PDF
- developers. The following options are available:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--compress-streams=@1@replaceable@1@[yn]@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- By default, or with @1@option@1@--compress-streams=y@2@option@2@,
- qpdf will compress any stream with no other filters applied to
- it with the <literal>/FlateDecode</literal> filter when it
- writes it. To suppress this behavior and preserve uncompressed
- streams as uncompressed, use
- @1@option@1@--compress-streams=n@2@option@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--decode-level=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Controls which streams qpdf tries to decode. The default is
- @1@option@1@generalized@2@option@2@. The following options are
- available:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@none@2@option@2@: do not attempt to decode any streams
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@generalized@2@option@2@: decode streams filtered with
- supported generalized filters:
- <literal>/LZWDecode</literal>,
- <literal>/FlateDecode</literal>,
- <literal>/ASCII85Decode</literal>, and
- <literal>/ASCIIHexDecode</literal>. We define generalized
- filters as those to be used for general-purpose compression
- or encoding, as opposed to filters specifically designed
- for image data. Note that, by default, streams already
- compressed with <literal>/FlateDecode</literal> are not
- uncompressed and recompressed unless you also specify
- @1@option@1@--recompress-flate@2@option@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@specialized@2@option@2@: in addition to generalized,
- decode streams with supported non-lossy specialized
- filters; currently this is just
- <literal>/RunLengthDecode</literal>
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@all@2@option@2@: in addition to generalized and
- specialized, decode streams with supported lossy filters;
- currently this is just <literal>/DCTDecode</literal> (JPEG)
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--stream-data=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Controls transformation of stream data. This option predates
- the @1@option@1@--compress-streams@2@option@2@ and
- @1@option@1@--decode-level@2@option@2@ options. Those options can be
- used to achieve the same affect with more control. The value
- of @1@option@1@@1@replaceable@1@option@2@replaceable@2@@2@option@2@ may be
- one of the following:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@compress@2@option@2@: recompress stream data when
- possible (default); equivalent to
- @1@option@1@--compress-streams=y@2@option@2@
- @1@option@1@--decode-level=generalized@2@option@2@. Does not
- recompress streams already compressed with
- <literal>/FlateDecode</literal> unless
- @1@option@1@--recompress-flate@2@option@2@ is also specified.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@preserve@2@option@2@: leave all stream data as is;
- equivalent to @1@option@1@--compress-streams=n@2@option@2@
- @1@option@1@--decode-level=none@2@option@2@
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@uncompress@2@option@2@: uncompress stream data
- compressed with generalized filters when possible;
- equivalent to @1@option@1@--compress-streams=n@2@option@2@
- @1@option@1@--decode-level=generalized@2@option@2@
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--recompress-flate@2@option@2@</term>
- <listitem>
- <para>
- By default, streams already compressed with
- <literal>/FlateDecode</literal> are left alone rather than
- being uncompressed and recompressed. This option causes qpdf
- to uncompress and recompress the streams. There is a
- significant performance cost to using this option, but you
- probably want to use it if you specify
- @1@option@1@--compression-level@2@option@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--compression-level=@1@replaceable@1@level@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- When writing new streams that are compressed with
- <literal>/FlateDecode</literal>, use the specified compression
- level. The value of @1@option@1@level@2@option@2@ should be a number
- from 1 to 9 and is passed directly to zlib, which implements
- deflate compression. Note that qpdf doesn't uncompress and
- recompress streams by default. To have this option apply to
- already compressed streams, you should also specify
- @1@option@1@--recompress-flate@2@option@2@. If your goal is to shrink
- the size of PDF files, you should also use
- @1@option@1@--object-streams=generate@2@option@2@.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--normalize-content=[yn]@2@option@2@</term>
- <listitem>
- <para>
- Enables or disables normalization of content streams. Content
- normalization is enabled by default in QDF mode. Please see
- <xref linkend="ref.qdf"/> for additional discussion of QDF
- mode.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--object-streams=@1@replaceable@1@mode@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Controls handling of object streams. The value of
- @1@option@1@@1@replaceable@1@mode@2@replaceable@2@@2@option@2@ may be one of
- the following:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@preserve@2@option@2@: preserve original object streams
- (default)
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@disable@2@option@2@: don't write any object streams
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@generate@2@option@2@: use object streams wherever
- possible
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--preserve-unreferenced@2@option@2@</term>
- <listitem>
- <para>
- Tells qpdf to preserve objects that are not referenced when
- writing the file. Ordinarily any object that is not referenced
- in a traversal of the document from the trailer dictionary
- will be discarded. This may be useful in working with some
- damaged files or inspecting files with known unreferenced
- objects.
- </para>
- <para>
- This flag is ignored for linearized files and has the effect
- of causing objects in the new file to be written in order by
- object ID from the original file. This does not mean that
- object numbers will be the same since qpdf may create stream
- lengths as direct or indirect differently from the original
- file, and the original file may have gaps in its numbering.
- </para>
- <para>
- See also @1@option@1@--preserve-unreferenced-resources@2@option@2@,
- which does something completely different.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--remove-unreferenced-resources=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- The @1@replaceable@1@option@2@replaceable@2@ may be
- <literal>auto</literal>, <literal>yes</literal>, or
- <literal>no</literal>. The default is <literal>auto</literal>.
- </para>
- <para>
- Starting with qpdf 8.1, when splitting pages, qpdf is able to
- attempt to remove images and fonts that are not used by a page
- even if they are referenced in the page's resources
- dictionary. When shared resources are in use, this behavior
- can greatly reduce the file sizes of split pages, but the
- analysis is very slow. In versions from 8.1 through 9.1.1,
- qpdf did this analysis by default. Starting in qpdf 10.0.0, if
- <literal>auto</literal> is used, qpdf does a quick analysis of
- the file to determine whether the file is likely to have
- unreferenced objects on pages, a pattern that frequently
- occurs when resource dictionaries are shared across multiple
- pages and rarely occurs otherwise. If it discovers this
- pattern, then it will attempt to remove unreferenced
- resources. Usually this means you get the slower splitting
- speed only when it's actually going to create smaller files.
- You can suppress removal of unreferenced resources altogether
- by specifying <literal>no</literal> or force it to do the full
- algorithm by specifying <literal>yes</literal>.
- </para>
- <para>
- Other than cases in which you don't care about file size and
- care a lot about runtime, there are few reasons to use this
- option, especially now that <literal>auto</literal> mode is
- supported. One reason to use this is if you suspect that qpdf
- is removing resources it shouldn't be removing. If you
- encounter that case, please report it as bug at <ulink
- url="https://github.com/qpdf/qpdf/issues/">https://github.com/qpdf/qpdf/issues/</ulink>.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--preserve-unreferenced-resources@2@option@2@</term>
- <listitem>
- <para>
- This is a synonym for
- @1@option@1@--remove-unreferenced-resources=no@2@option@2@.
- </para>
- <para>
- See also @1@option@1@--preserve-unreferenced@2@option@2@, which does
- something completely different.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--newline-before-endstream@2@option@2@</term>
- <listitem>
- <para>
- Tells qpdf to insert a newline before the
- <literal>endstream</literal> keyword, not counted in the
- length, after any stream content even if the last character of
- the stream was a newline. This may result in two newlines in
- some cases. This is a requirement of PDF/A. While qpdf doesn't
- specifically know how to generate PDF/A-compliant PDFs, this
- at least prevents it from removing compliance on already
- compliant files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--linearize-pass1=@1@replaceable@1@file@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Write the first pass of linearization to the named file. The
- resulting file is not a valid PDF file. This option is useful
- only for debugging <classname>QPDFWriter</classname>'s
- linearization code. When qpdf linearizes files, it writes the
- file in two passes, using the first pass to calculate sizes
- and offsets that are required for hint tables and the
- linearization dictionary. Ordinarily, the first pass is
- discarded. This option enables it to be captured.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--coalesce-contents@2@option@2@</term>
- <listitem>
- <para>
- When a page's contents are split across multiple streams, this
- option causes qpdf to combine them into a single stream. Use
- of this option is never necessary for ordinary usage, but it
- can help when working with some files in some cases. For
- example, this can also be combined with QDF mode or content
- normalization to make it easier to look at all of a page's
- contents at once.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--flatten-annotations=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- This option collapses annotations into the pages' contents
- with special handling for form fields. Ordinarily, an
- annotation is rendered separately and on top of the page.
- Combining annotations into the page's contents effectively
- freezes the placement of the annotations, making them look
- right after various page transformations. The library
- functionality backing this option was added for the benefit of
- programs that want to create <emphasis>n-up</emphasis> page
- layouts and other similar things that don't work well with
- annotations. The @1@replaceable@1@option@2@replaceable@2@ parameter
- may be any of the following:
- <itemizedlist>
- <listitem>
- <para>
- @1@option@1@all@2@option@2@: include all annotations that are not
- marked invisible or hidden
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@print@2@option@2@: only include annotations that
- indicate that they should appear when the page is printed
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@screen@2@option@2@: omit annotations that indicate
- they should not appear on the screen
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Note that form fields are special because the annotations that
- are used to render filled-in form fields may become out of
- date from the fields' values if the form is filled in by a
- program that doesn't know how to update the appearances. If
- qpdf detects this case, its default behavior is not to flatten
- those annotations because doing so would cause the value of
- the form field to be lost. This gives you a chance to go back
- and resave the form with a program that knows how to generate
- appearances. QPDF itself can generate appearances with some
- limitations. See the @1@option@1@--generate-appearances@2@option@2@
- option below.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--generate-appearances@2@option@2@</term>
- <listitem>
- <para>
- If a file contains interactive form fields and indicates that
- the appearances are out of date with the values of the form,
- this flag will regenerate appearances, subject to a few
- limitations. Note that there is not usually a reason to do
- this, but it can be necessary before using the
- @1@option@1@--flatten-annotations@2@option@2@ option. Most of these
- are not a problem with well-behaved PDF files. The limitations
- are as follows:
- <itemizedlist>
- <listitem>
- <para>
- Radio button and checkbox appearances use the pre-set
- values in the PDF file. QPDF just makes sure that the
- correct appearance is displayed based on the value of the
- field. This is fine for PDF files that create their forms
- properly. Some PDF writers save appearances for fields when
- they change, which could cause some controls to have
- inconsistent appearances.
- </para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- For text fields and list boxes, any characters that fall
- outside of US-ASCII or, if detected, "Windows
- ANSI" or "Mac Roman" encoding, will be
- replaced by the <literal>?</literal> character.
- </para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- Quadding is ignored. Quadding is used to specify whether
- the contents of a field should be left, center, or right
- aligned with the field.
- </para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- Rich text, multi-line, and other more elaborate formatting
- directives are ignored.
- </para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- There is no support for multi-select fields or signature
- fields.
- </para>
- </listitem>
- </itemizedlist>
- If qpdf doesn't do a good enough job with your form, use an
- external application to save your filled-in form before
- processing it with qpdf.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--optimize-images@2@option@2@</term>
- <listitem>
- <para>
- This flag causes qpdf to recompress all images that are not
- compressed with DCT (JPEG) using DCT compression as long as
- doing so decreases the size in bytes of the image data and the
- image does not fall below minimum specified dimensions. Useful
- information is provided when used in combination with
- @1@option@1@--verbose@2@option@2@. See also the
- @1@option@1@--oi-min-width@2@option@2@,
- @1@option@1@--oi-min-height@2@option@2@, and
- @1@option@1@--oi-min-area@2@option@2@ options. By default, starting
- in qpdf 8.4, inline images are converted to regular images
- and optimized as well. Use
- @1@option@1@--keep-inline-images@2@option@2@ to prevent inline images
- from being included.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--oi-min-width=@1@replaceable@1@width@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Avoid optimizing images whose width is below the specified
- amount. If omitted, the default is 128 pixels. Use 0 for no
- minimum.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--oi-min-height=@1@replaceable@1@height@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Avoid optimizing images whose height is below the specified
- amount. If omitted, the default is 128 pixels. Use 0 for no
- minimum.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--oi-min-area=@1@replaceable@1@area-in-pixels@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Avoid optimizing images whose pixel count
- (width × height) is below the specified amount. If
- omitted, the default is 16,384 pixels. Use 0 for no minimum.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--externalize-inline-images@2@option@2@</term>
- <listitem>
- <para>
- Convert inline images to regular images. By default, images
- whose data is at least 1,024 bytes are converted when this
- option is selected. Use @1@option@1@--ii-min-bytes@2@option@2@ to
- change the size threshold. This option is implicitly selected
- when @1@option@1@--optimize-images@2@option@2@ is selected. Use
- @1@option@1@--keep-inline-images@2@option@2@ to exclude inline images
- from image optimization.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--ii-min-bytes=@1@replaceable@1@bytes@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Avoid converting inline images whose size is below the
- specified minimum size to regular images. If omitted, the
- default is 1,024 bytes. Use 0 for no minimum.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--keep-inline-images@2@option@2@</term>
- <listitem>
- <para>
- Prevent inline images from being included in image
- optimization. This option has no affect when
- @1@option@1@--optimize-images@2@option@2@ is not specified.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--remove-page-labels@2@option@2@</term>
- <listitem>
- <para>
- Remove page labels from the output file.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--qdf@2@option@2@</term>
- <listitem>
- <para>
- Turns on QDF mode. For additional information on QDF, please
- see <xref linkend="ref.qdf"/>. Note that
- @1@option@1@--linearize@2@option@2@ disables QDF mode.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--min-version=@1@replaceable@1@version@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- Forces the PDF version of the output file to be at least
- @1@replaceable@1@version@2@replaceable@2@. In other words, if the
- input file has a lower version than the specified version, the
- specified version will be used. If the input file has a
- higher version, the input file's original version will be
- used. It is seldom necessary to use this option since qpdf
- will automatically increase the version as needed when adding
- features that require newer PDF readers.
- </para>
- <para>
- The version number may be expressed in the form
- @1@replaceable@1@major.minor.extension-level@2@replaceable@2@, in
- which case the version is interpreted as
- @1@replaceable@1@major.minor@2@replaceable@2@ at extension level
- @1@replaceable@1@extension-level@2@replaceable@2@. For example,
- version <literal>1.7.8</literal> represents version 1.7 at
- extension level 8. Note that minimal syntax checking is done
- on the command line.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--force-version=@1@replaceable@1@version@2@replaceable@2@@2@option@2@</term>
- <listitem>
- <para>
- This option forces the PDF version to be the exact version
- specified <emphasis>even when the file may have content that
- is not supported in that version</emphasis>. The version
- number is interpreted in the same way as with
- @1@option@1@--min-version@2@option@2@ so that extension levels can be
- set. In some cases, forcing the output file's PDF version to
- be lower than that of the input file will cause qpdf to
- disable certain features of the document. Specifically,
- 256-bit keys are disabled if the version is less than 1.7 with
- extension level 8 (except R5 is disabled if less than 1.7 with
- extension level 3), AES encryption is disabled if the version
- is less than 1.6, cleartext metadata and object streams are
- disabled if less than 1.5, 128-bit encryption keys are
- disabled if less than 1.4, and all encryption is disabled if
- less than 1.3. Even with these precautions, qpdf won't be
- able to do things like eliminate use of newer image
- compression schemes, transparency groups, or other features
- that may have been added in more recent versions of PDF.
- </para>
- <para>
- As a general rule, with the exception of big structural things
- like the use of object streams or AES encryption, PDF viewers
- are supposed to ignore features in files that they don't
- support from newer versions. This means that forcing the
- version to a lower version may make it possible to open your
- PDF file with an older version, though bear in mind that some
- of the original document's functionality may be lost.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- <para>
- By default, when a stream is encoded using non-lossy filters that
- qpdf understands and is not already compressed using a good
- compression scheme, qpdf will uncompress and recompress streams.
- Assuming proper filter implements, this is safe and generally
- results in smaller files. This behavior may also be explicitly
- requested with @1@option@1@--stream-data=compress@2@option@2@.
- </para>
- <para>
- When @1@option@1@--normalize-content=y@2@option@2@ is specified, qpdf
- will attempt to normalize whitespace and newlines in page content
- streams. This is generally safe but could, in some cases, cause
- damage to the content streams. This option is intended for people
- who wish to study PDF content streams or to debug PDF content.
- You should not use this for "production" PDF files.
- </para>
- <para>
- When normalizing content, if qpdf runs into any lexical errors, it
- will print a warning indicating that content may be damaged. The
- only situation in which qpdf is known to cause damage during
- content normalization is when a page's contents are split across
- multiple streams and streams are split in the middle of a lexical
- token such as a string, name, or inline image. Note that files
- that do this are invalid since the PDF specification states that
- content streams are not to be split in the middle of a token. If
- you want to inspect the original content streams in an
- uncompressed format, you can always run with @1@option@1@--qdf
- --normalize-content=n@2@option@2@ for a QDF file without content
- normalization, or alternatively
- @1@option@1@--stream-data=uncompress@2@option@2@ for a regular non-QDF
- mode file with uncompressed streams. These will both uncompress
- all the streams but will not attempt to normalize content. Please
- note that if you are using content normalization or QDF mode for
- the purpose of manually inspecting files, you don't have to care
- about this.
- </para>
- <para>
- Object streams, also known as compressed objects, were introduced
- into the PDF specification at version 1.5, corresponding to
- Acrobat 6. Some older PDF viewers may not support files with
- object streams. qpdf can be used to transform files with object
- streams to files without object streams or vice versa. As
- mentioned above, there are three object stream modes:
- @1@option@1@preserve@2@option@2@, @1@option@1@disable@2@option@2@, and
- @1@option@1@generate@2@option@2@.
- </para>
- <para>
- In @1@option@1@preserve@2@option@2@ mode, the relationship to objects and
- the streams that contain them is preserved from the original file.
- In @1@option@1@disable@2@option@2@ mode, all objects are written as
- regular, uncompressed objects. The resulting file should be
- readable by older PDF viewers. (Of course, the content of the
- files may include features not supported by older viewers, but at
- least the structure will be supported.) In
- @1@option@1@generate@2@option@2@ mode, qpdf will create its own object
- streams. This will usually result in more compact PDF files,
- though they may not be readable by older viewers. In this mode,
- qpdf will also make sure the PDF version number in the header is
- at least 1.5.
- </para>
- <para>
- The @1@option@1@--qdf@2@option@2@ flag turns on QDF mode, which changes
- some of the defaults described above. Specifically, in QDF mode,
- by default, stream data is uncompressed, content streams are
- normalized, and encryption is removed. These defaults can still
- be overridden by specifying the appropriate options as described
- above. Additionally, in QDF mode, stream lengths are stored as
- indirect objects, objects are laid out in a less efficient but
- more readable fashion, and the documents are interspersed with
- comments that make it easier for the user to find things and also
- make it possible for @1@command@1@fix-qdf@2@command@2@ to work properly.
- QDF mode is intended for people, mostly developers, who wish to
- inspect or modify PDF files in a text editor. For details, please
- see <xref linkend="ref.qdf"/>.
- </para>
- </sect1>
- <sect1 id="ref.testing-options">
- <title>Testing, Inspection, and Debugging Options</title>
- <para>
- These options can be useful for digging into PDF files or for use
- in automated test suites for software that uses the qpdf library.
- When any of the options in this section are specified, no output
- file should be given. The following options are available:
- <variablelist>
- <varlistentry>
- <term>@1@option@1@--deterministic-id@2@option@2@</term>
- <listitem>
- <para>
- Causes generation of a deterministic value for /ID. This
- prevents use of timestamp and output file name information in
- the /ID generation. Instead, at some slight additional runtime
- cost, the /ID field is generated to include a digest of the
- significant parts of the content of the output PDF file. This
- means that a given qpdf operation should generate the same /ID
- each time it is run, which can be useful when caching results
- or for generation of some test data. Use of this flag is not
- compatible with creation of encrypted files.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--static-id@2@option@2@</term>
- <listitem>
- <para>
- Causes generation of a fixed value for /ID. This is intended
- for testing only. Never use it for production files. If you
- are trying to get the same /ID each time for a given file and
- you are not generating encrypted files, consider using the
- @1@option@1@--deterministic-id@2@option@2@ option.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--static-aes-iv@2@option@2@</term>
- <listitem>
- <para>
- Causes use of a static initialization vector for AES-CBC.
- This is intended for testing only so that output files can be
- reproducible. Never use it for production files. This option
- in particular is not secure since it significantly weakens the
- encryption.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--no-original-object-ids@2@option@2@</term>
- <listitem>
- <para>
- Suppresses inclusion of original object ID comments in QDF
- files. This can be useful when generating QDF files for test
- purposes, particularly when comparing them to determine
- whether two PDF files have identical content.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-encryption@2@option@2@</term>
- <listitem>
- <para>
- Shows document encryption parameters. Also shows the
- document's user password if the owner password is given.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-encryption-key@2@option@2@</term>
- <listitem>
- <para>
- When encryption information is being displayed, as when
- @1@option@1@--check@2@option@2@ or @1@option@1@--show-encryption@2@option@2@
- is given, display the computed or retrieved encryption key as
- a hexadecimal string. This value is not ordinarily useful to
- users, but it can be used as the argument to
- @1@option@1@--password@2@option@2@ if the
- @1@option@1@--password-is-hex-key@2@option@2@ is specified. Note
- that, when PDF files are encrypted, passwords and other
- metadata are used only to compute an encryption key, and the
- encryption key is what is actually used for encryption. This
- enables retrieval of that key.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--check-linearization@2@option@2@</term>
- <listitem>
- <para>
- Checks file integrity and linearization status.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-linearization@2@option@2@</term>
- <listitem>
- <para>
- Checks and displays all data in the linearization hint tables.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-xref@2@option@2@</term>
- <listitem>
- <para>
- Shows the contents of the cross-reference table in a
- human-readable form. This is especially useful for files with
- cross-reference streams which are stored in a binary format.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-object=trailer|obj[,gen]@2@option@2@</term>
- <listitem>
- <para>
- Show the contents of the given object. This is especially
- useful for inspecting objects that are inside of object
- streams (also known as "compressed objects").
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--raw-stream-data@2@option@2@</term>
- <listitem>
- <para>
- When used along with the @1@option@1@--show-object@2@option@2@
- option, if the object is a stream, shows the raw stream data
- instead of object's contents.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--filtered-stream-data@2@option@2@</term>
- <listitem>
- <para>
- When used along with the @1@option@1@--show-object@2@option@2@
- option, if the object is a stream, shows the filtered stream
- data instead of object's contents. If the stream is filtered
- using filters that qpdf does not support, an error will be
- issued.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-npages@2@option@2@</term>
- <listitem>
- <para>
- Prints the number of pages in the input file on a line by
- itself. Since the number of pages appears by itself on a
- line, this option can be useful for scripting if you need to
- know the number of pages in a file.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--show-pages@2@option@2@</term>
- <listitem>
- <para>
- Shows the object and generation number for each page
- dictionary object and for each content stream associated with
- the page. Having this information makes it more convenient to
- inspect objects from a particular page.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--with-images@2@option@2@</term>
- <listitem>
- <para>
- When used along with @1@option@1@--show-pages@2@option@2@, also shows
- the object and generation numbers for the image objects on
- each page. (At present, information about images in shared
- resource dictionaries are not output by this command. This is
- discussed in a comment in the source code.)
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--json@2@option@2@</term>
- <listitem>
- <para>
- Generate a JSON representation of the file. This is described
- in depth in <xref linkend="ref.json"/>
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--json-help@2@option@2@</term>
- <listitem>
- <para>
- Describe the format of the JSON output.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--json-key=key@2@option@2@</term>
- <listitem>
- <para>
- This option is repeatable. If specified, only top-level keys
- specified will be included in the JSON output. If not
- specified, all keys will be shown.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--json-object=trailer|obj[,gen]@2@option@2@</term>
- <listitem>
- <para>
- This option is repeatable. If specified, only specified
- objects will be shown in the
- "<literal>objects</literal>" key of the JSON
- output. If absent, all objects will be shown.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>@1@option@1@--check@2@option@2@</term>
- <listitem>
- <para>
- Checks file structure and well as encryption, linearization,
- and encoding of stream data. A file for which
- @1@option@1@--check@2@option@2@ reports no errors may still have
- errors in stream data content but should otherwise be
- structurally sound. If @1@option@1@--check@2@option@2@ any errors,
- qpdf will exit with a status of 2. There are some recoverable
- conditions that @1@option@1@--check@2@option@2@ detects. These are
- issued as warnings instead of errors. If qpdf finds no errors
- but finds warnings, it will exit with a status of 3 (as of
- version 2.0.4). When @1@option@1@--check@2@option@2@ is combined
- with other options, checks are always performed before any
- other options are processed. For erroneous files,
- @1@option@1@--check@2@option@2@ will cause qpdf to attempt to
- recover, after which other options are effectively operating
- on the recovered file. Combining @1@option@1@--check@2@option@2@ with
- other options in this way can be useful for manually
- recovering severely damaged files. Note that
- @1@option@1@--check@2@option@2@ produces no output to standard output
- when everything is valid, so if you are using this to
- programmatically validate files in bulk, it is safe to run
- without output redirected to @1@filename@1@/dev/null@2@filename@2@
- and just check for a 0 exit code.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- <para>
- The @1@option@1@--raw-stream-data@2@option@2@ and
- @1@option@1@--filtered-stream-data@2@option@2@ options are ignored unless
- @1@option@1@--show-object@2@option@2@ is given. Either of these options
- will cause the stream data to be written to standard output. In
- order to avoid commingling of stream data with other output, it is
- recommend that these objects not be combined with other
- test/inspection options.
- </para>
- <para>
- If @1@option@1@--filtered-stream-data@2@option@2@ is given and
- @1@option@1@--normalize-content=y@2@option@2@ is also given, qpdf will
- attempt to normalize the stream data as if it is a page content
- stream. This attempt will be made even if it is not a page
- content stream, in which case it will produce unusable results.
- </para>
- </sect1>
- <sect1 id="ref.unicode-passwords">
- <title>Unicode Passwords</title>
- <para>
- At the library API level, all methods that perform encryption and
- decryption interpret passwords as strings of bytes. It is up to
- the caller to ensure that they are appropriately encoded. Starting
- with qpdf version 8.4.0, qpdf will attempt to make this easier for
- you when interact with qpdf via its command line interface. The
- PDF specification requires passwords used to encrypt files with
- 40-bit or 128-bit encryption to be encoded with PDF Doc encoding.
- This encoding is a single-byte encoding that supports ISO-Latin-1
- and a handful of other commonly used characters. It has a large
- overlap with Windows ANSI but is not exactly the same. There is
- generally not a way to provide PDF Doc encoded strings on the
- command line. As such, qpdf versions prior to 8.4.0 would often
- create PDF files that couldn't be opened with other software when
- given a password with non-ASCII characters to encrypt a file with
- 40-bit or 128-bit encryption. Starting with qpdf 8.4.0, qpdf
- recognizes the encoding of the parameter and transcodes it as
- needed. The rest of this section provides the details about
- exactly how qpdf behaves. Most users will not need to know this
- information, but it might be useful if you have been working
- around qpdf's old behavior or if you are using qpdf to generate
- encrypted files for testing other PDF software.
- </para>
- <para>
- A note about Windows: when qpdf builds, it attempts to determine
- what it has to do to use <function>wmain</function> instead of
- <function>main</function> on Windows. The
- <function>wmain</function> function is an alternative entry point
- that receives all arguments as UTF-16-encoded strings. When qpdf
- starts up this way, it converts all the strings to UTF-8 encoding
- and then invokes the regular main. This means that, as far as qpdf
- is concerned, it receives its command-line arguments with UTF-8
- encoding, just as it would in any modern Linux or UNIX
- environment.
- </para>
- <para>
- If a file is being encrypted with 40-bit or 128-bit encryption and
- the supplied password is not a valid UTF-8 string, qpdf will fall
- back to the behavior of interpreting the password as a string of
- bytes. If you have old scripts that encrypt files by passing the
- output of @1@command@1@iconv@2@command@2@ to qpdf, you no longer need to
- do that, but if you do, qpdf should still work. The only exception
- would be for the extremely unlikely case of a password that is
- encoded with a single-byte encoding but also happens to be valid
- UTF-8. Such a password would contain strings of even numbers of
- characters that alternate between accented letters and symbols. In
- the extremely unlikely event that you are intentionally using such
- passwords and qpdf is thwarting you by interpreting them as UTF-8,
- you can use @1@option@1@--password-mode=bytes@2@option@2@ to suppress
- qpdf's automatic behavior.
- </para>
- <para>
- The @1@option@1@--password-mode@2@option@2@ option, as described earlier
- in this chapter, can be used to change qpdf's interpretation of
- supplied passwords. There are very few reasons to use this option.
- One would be the unlikely case described in the previous paragraph
- in which the supplied password happens to be valid UTF-8 but isn't
- supposed to be UTF-8. Your best bet would be just to provide the
- password as a valid UTF-8 string, but you could also use
- @1@option@1@--password-mode=bytes@2@option@2@. Another reason to use
- @1@option@1@--password-mode=bytes@2@option@2@ would be to intentionally
- generate PDF files encrypted with passwords that are not properly
- encoded. The qpdf test suite does this to generate invalid files
- for the purpose of testing its password recovery capability. If
- you were trying to create intentionally incorrect files for a
- similar purposes, the @1@option@1@bytes@2@option@2@ password mode can
- enable you to do this.
- </para>
- <para>
- When qpdf attempts to decrypt a file with a password that contains
- non-ASCII characters, it will generate a list of alternative
- passwords by attempting to interpret the password as each of a
- handful of different coding systems and then transcode them to the
- required format. This helps to compensate for the supplied
- password being given in the wrong coding system, such as would
- happen if you used the @1@command@1@iconv@2@command@2@ workaround that
- was previously needed. It also generates passwords by doing the
- reverse operation: translating from correct in incorrect encoding
- of the password. This would enable qpdf to decrypt files using
- passwords that were improperly encoded by whatever software
- encrypted the files, including older versions of qpdf invoked
- without properly encoded passwords. The combination of these two
- recovery methods should make qpdf transparently open most
- encrypted files with the password supplied correctly but in the
- wrong coding system. There are no real downsides to this behavior,
- but if you don't want qpdf to do this, you can use the
- @1@option@1@--suppress-password-recovery@2@option@2@ option. One reason
- to do that is to ensure that you know the exact password that was
- used to encrypt the file.
- </para>
- <para>
- With these changes, qpdf now generates compliant passwords in most
- cases. There are still some exceptions. In particular, the PDF
- specification directs compliant writers to normalize Unicode
- passwords and to perform certain transformations on passwords with
- bidirectional text. Implementing this functionality requires using
- a real Unicode library like ICU. If a client application that uses
- qpdf wants to do this, the qpdf library will accept the resulting
- passwords, but qpdf will not perform these transformations itself.
- It is possible that this will be addressed in a future version of
- qpdf. The <classname>QPDFWriter</classname> methods that enable
- encryption on the output file accept passwords as strings of
- bytes.
- </para>
- <para>
- Please note that the @1@option@1@--password-is-hex-key@2@option@2@ option
- is unrelated to all this. This flag bypasses the normal process of
- going from password to encryption string entirely, allowing the
- raw encryption key to be specified directly. This is useful for
- forensic purposes or for brute-force recovery of files with
- unknown passwords.
- </para>
- </sect1>
- </chapter>
- <chapter id="ref.qdf">
- <title>QDF Mode</title>
- <para>
- In QDF mode, qpdf creates PDF files in what we call @1@firstterm@1@QDF
- form@2@firstterm@2@. A PDF file in QDF form, sometimes called a QDF
- file, is a completely valid PDF file that has
- <literal>%QDF-1.0</literal> as its third line (after the pdf header
- and binary characters) and has certain other characteristics. The
- purpose of QDF form is to make it possible to edit PDF files, with
- some restrictions, in an ordinary text editor. This can be very
- useful for experimenting with different PDF constructs or for
- making one-off edits to PDF files (though there are other reasons
- why this may not always work). Note that QDF mode does not support
- linearized files. If you enable linearization, QDF mode is
- automatically disabled.
- </para>
- <para>
- It is ordinarily very difficult to edit PDF files in a text editor
- for two reasons: most meaningful data in PDF files is compressed,
- and PDF files are full of offset and length information that makes
- it hard to add or remove data. A QDF file is organized in a manner
- such that, if edits are kept within certain constraints, the
- @1@command@1@fix-qdf@2@command@2@ program, distributed with qpdf, is able
- to restore edited files to a correct state. The
- @1@command@1@fix-qdf@2@command@2@ program takes no command-line
- arguments. It reads a possibly edited QDF file from standard input
- and writes a repaired file to standard output.
- </para>
- <para>
- The following attributes characterize a QDF file:
- <itemizedlist>
- <listitem>
- <para>
- All objects appear in numerical order in the PDF file, including
- when objects appear in object streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Objects are printed in an easy-to-read format, and all line
- endings are normalized to UNIX line endings.
- </para>
- </listitem>
- <listitem>
- <para>
- Unless specifically overridden, streams appear uncompressed
- (when qpdf supports the filters and they are compressed with a
- non-lossy compression scheme), and most content streams are
- normalized (line endings are converted to just a UNIX-style
- linefeeds).
- </para>
- </listitem>
- <listitem>
- <para>
- All streams lengths are represented as indirect objects, and the
- stream length object is always the next object after the stream.
- If the stream data does not end with a newline, an extra newline
- is inserted, and a special comment appears after the stream
- indicating that this has been done.
- </para>
- </listitem>
- <listitem>
- <para>
- If the PDF file contains object streams, if object stream
- <emphasis>n</emphasis> contains <emphasis>k</emphasis> objects,
- those objects are numbered from <emphasis>n+1</emphasis> through
- <emphasis>n+k</emphasis>, and the object number/offset pairs
- appear on a separate line for each object. Additionally, each
- object in the object stream is preceded by a comment indicating
- its object number and index. This makes it very easy to find
- objects in object streams.
- </para>
- </listitem>
- <listitem>
- <para>
- All beginnings of objects, <literal>stream</literal> tokens,
- <literal>endstream</literal> tokens, and
- <literal>endobj</literal> tokens appear on lines by themselves.
- A blank line follows every <literal>endobj</literal> token.
- </para>
- </listitem>
- <listitem>
- <para>
- If there is a cross-reference stream, it is unfiltered.
- </para>
- </listitem>
- <listitem>
- <para>
- Page dictionaries and page content streams are marked with
- special comments that make them easy to find.
- </para>
- </listitem>
- <listitem>
- <para>
- Comments precede each object indicating the object number of the
- corresponding object in the original file.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- When editing a QDF file, any edits can be made as long as the above
- constraints are maintained. This means that you can freely edit a
- page's content without worrying about messing up the QDF file. It
- is also possible to add new objects so long as those objects are
- added after the last object in the file or subsequent objects are
- renumbered. If a QDF file has object streams in it, you can always
- add the new objects before the xref stream and then change the
- number of the xref stream, since nothing generally ever references
- it by number.
- </para>
- <para>
- It is not generally practical to remove objects from QDF files
- without messing up object numbering, but if you remove all
- references to an object, you can run qpdf on the file (after
- running @1@command@1@fix-qdf@2@command@2@), and qpdf will omit the
- now-orphaned object.
- </para>
- <para>
- When @1@command@1@fix-qdf@2@command@2@ is run, it goes through the file
- and recomputes the following parts of the file:
- <itemizedlist>
- <listitem>
- <para>
- the <literal>/N</literal>, <literal>/W</literal>, and
- <literal>/First</literal> keys of all object stream dictionaries
- </para>
- </listitem>
- <listitem>
- <para>
- the pairs of numbers representing object numbers and offsets of
- objects in object streams
- </para>
- </listitem>
- <listitem>
- <para>
- all stream lengths
- </para>
- </listitem>
- <listitem>
- <para>
- the cross-reference table or cross-reference stream
- </para>
- </listitem>
- <listitem>
- <para>
- the offset to the cross-reference table or cross-reference
- stream following the <literal>startxref</literal> token
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </chapter>
- <chapter id="ref.using-library">
- <title>Using the QPDF Library</title>
- <sect1 id="ref.using.from-cxx">
- <title>Using QPDF from C++</title>
- <para>
- The source tree for the qpdf package has an
- @1@filename@1@examples@2@filename@2@ directory that contains a few
- example programs. The @1@filename@1@qpdf/qpdf.cc@2@filename@2@ source
- file also serves as a useful example since it exercises almost all
- of the qpdf library's public interface. The best source of
- documentation on the library itself is reading comments in
- @1@filename@1@include/qpdf/QPDF.hh@2@filename@2@,
- @1@filename@1@include/qpdf/QPDFWriter.hh@2@filename@2@, and
- @1@filename@1@include/qpdf/QPDFObjectHandle.hh@2@filename@2@.
- </para>
- <para>
- All header files are installed in the @1@filename@1@include/qpdf@2@filename@2@ directory. It
- is recommend that you use <literal>#include
- &lt;qpdf/QPDF.hh&gt;</literal> rather than adding
- @1@filename@1@include/qpdf@2@filename@2@ to your include path.
- </para>
- <para>
- When linking against the qpdf static library, you may also need to
- specify <literal>-lz -ljpeg</literal> on your link command. If
- your system understands how to read libtool
- @1@filename@1@.la@2@filename@2@ files, this may not be necessary.
- </para>
- <para>
- The qpdf library is safe to use in a multithreaded program, but no
- individual <type>QPDF</type> object instance (including
- <type>QPDF</type>, <type>QPDFObjectHandle</type>, or
- <type>QPDFWriter</type>) can be used in more than one thread at a
- time. Multiple threads may simultaneously work with different
- instances of these and all other QPDF objects.
- </para>
- </sect1>
- <sect1 id="ref.using.other-languages">
- <title>Using QPDF from other languages</title>
- <para>
- The qpdf library is implemented in C++, which makes it hard to use
- directly in other languages. There are a few things that can help.
- </para>
- <variablelist>
- <varlistentry>
- <term>"C"</term>
- <listitem>
- <para>
- The qpdf library includes a "C" language interface
- that provides a subset of the overall capabilities. The header
- file @1@filename@1@qpdf/qpdf-c.h@2@filename@2@ includes information
- about its use. As long as you use a C++ linker, you can link C
- programs with qpdf and use the C API. For languages that can
- directly load methods from a shared library, the C API can also
- be useful. People have reported success using the C API from
- other languages on Windows by directly calling functions in the
- DLL.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>Python</term>
- <listitem>
- <para>
- A Python module called <ulink
- url="https://pypi.org/project/pikepdf/">pikepdf</ulink>
- provides a clean and highly functional set of Python bindings
- to the qpdf library. Using pikepdf, you can work with PDF files
- in a natural way and combine qpdf's capabilities with other
- functionality provided by Python's rich standard library and
- available modules.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>Other Languages</term>
- <listitem>
- <para>
- Starting with version 8.3.0, the @1@command@1@qpdf@2@command@2@
- command-line tool can produce a JSON representation of the PDF
- file's non-content data. This can facilitate interacting
- programmatically with PDF files through qpdf's command line
- interface. For more information, please see <xref
- linkend="ref.json"/>.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </sect1>
- <sect1 id="ref.unicode-files">
- <title>A Note About Unicode File Names</title>
- <para>
- When strings are passed to qpdf library routines either as
- <literal>char*</literal> or as <literal>std::string</literal>,
- they are treated as byte arrays except where otherwise noted. When
- Unicode is desired, qpdf wants UTF-8 unless otherwise noted in
- comments in header files. In modern UNIX/Linux environments, this
- generally does the right thing. In Windows, it's a bit more
- complicated. Starting in qpdf 8.4.0, passwords that contain
- Unicode characters are handled much better, and starting in qpdf
- 8.4.1, the library attempts to properly handle Unicode characters
- in filenames. In particular, in Windows, if a UTF-8 encoded string
- is used as a filename in either <classname>QPDF</classname> or
- <classname>QPDFWriter</classname>, it is internally converted to
- <literal>wchar_t*</literal>, and Unicode-aware Windows APIs are
- used. As such, qpdf will generally operate properly on files with
- non-ASCII characters in their names as long as the filenames are
- UTF-8 encoded for passing into the qpdf library API, but there are
- still some rough edges, such as the encoding of the filenames in
- error messages our CLI output messages. Patches or bug reports are
- welcome for any continuing issues with Unicode file names in
- Windows.
- </para>
- </sect1>
- </chapter>
- <chapter id="ref.weak-crypto">
- <title>Weak Cryptography</title>
- <para>
- Start with version 10.4, qpdf is taking steps to reduce the
- likelihood of a user <emphasis>accidentally</emphasis> creating PDF
- files with insecure cryptography but will continue to allow
- creation of such files indefinitely with explicit acknowledgment.
- </para>
- <para>
- The PDF file format makes use of RC4, which is known to be a weak
- cryptography algorithm, and MD5, which is a weak hashing algorithm.
- In version 10.4, qpdf generates warnings for some (but not all)
- cases of writing files with weak cryptography when invoked from the
- command-line. These warnings can be suppressed using the
- @1@option@1@--allow-weak-crypto@2@option@2@ option.
- </para>
- <para>
- It is planned for qpdf version 11 to be stricter, making it an
- error to write files with insecure cryptography from the
- command-line tool in most cases without specifying the
- @1@option@1@--allow-weak-crypto@2@option@2@ flag and also to require
- explicit steps when using the C++ library to enable use of insecure
- cryptography.
- </para>
- <para>
- Note that qpdf must always retain support for weak cryptographic
- algorithms since this is required for reading older PDF files that
- use it. Additionally, qpdf will always retain the ability to create
- files using weak cryptographic algorithms since, as a development
- tool, qpdf explicitly supports creating older or deprecated types
- of PDF files since these are sometimes needed to test or work with
- older versions of software. Even if other cryptography libraries
- drop support for RC4 or MD5, qpdf can always fall back to its
- internal implementations of those algorithms, so they are not going
- to disappear from qpdf.
- </para>
- </chapter>
- <chapter id="ref.json">
- <title>QPDF JSON</title>
- <sect1 id="ref.json-overview">
- <title>Overview</title>
- <para>
- Beginning with qpdf version 8.3.0, the @1@command@1@qpdf@2@command@2@
- command-line program can produce a JSON representation of the
- non-content data in a PDF file. It includes a dump in JSON format
- of all objects in the PDF file excluding the content of streams.
- This JSON representation makes it very easy to look in detail at
- the structure of a given PDF file, and it also provides a great way
- to work with PDF files programmatically from the command-line in
- languages that can't call or link with the qpdf library directly.
- Note that stream data can be extracted from PDF files using other
- qpdf command-line options.
- </para>
- </sect1>
- <sect1 id="ref.json-guarantees">
- <title>JSON Guarantees</title>
- <para>
- The qpdf JSON representation includes a JSON serialization of the
- raw objects in the PDF file as well as some computed information in
- a more easily extracted format. QPDF provides some guarantees about
- its JSON format. These guarantees are designed to simplify the
- experience of a developer working with the JSON format.
- <variablelist>
- <varlistentry>
- <term>Compatibility</term>
- <listitem>
- <para>
- The top-level JSON object output is a dictionary. The JSON
- output contains various nested dictionaries and arrays. With
- the exception of dictionaries that are populated by the fields
- of objects from the file, all instances of a dictionary are
- guaranteed to have exactly the same keys. Future versions of
- qpdf are free to add additional keys but not to remove keys or
- change the type of object that a key points to. The qpdf
- program validates this guarantee, and in the unlikely event
- that a bug in qpdf should cause it to generate data that
- doesn't conform to this rule, it will ask you to file a bug
- report.
- </para>
- <para>
- The top-level JSON structure contains a
- "<literal>version</literal>" key whose value is
- simple integer. The value of the <literal>version</literal> key
- will be incremented if a non-compatible change is made. A
- non-compatible change would be any change that involves removal
- of a key, a change to the format of data pointed to by a key,
- or a semantic change that requires a different interpretation
- of a previously existing key. A strong effort will be made to
- avoid breaking compatibility.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>Documentation</term>
- <listitem>
- <para>
- The @1@command@1@qpdf@2@command@2@ command can be invoked with the
- @1@option@1@--json-help@2@option@2@ option. This will output a JSON
- structure that has the same structure as the JSON output that
- qpdf generates, except that each field in the help output is a
- description of the corresponding field in the JSON output. The
- specific guarantees are as follows:
- <itemizedlist>
- <listitem>
- <para>
- A dictionary in the help output means that the
- corresponding location in the actual JSON output is also a
- dictionary with exactly the same keys; that is, no keys
- present in help are absent in the real output, and no keys
- will be present in the real output that are not in help. As
- a special case, if the dictionary has a single key whose
- name starts with <literal>&lt;</literal> and ends with
- <literal>&gt;</literal>, it means that the JSON output is a
- dictionary that can have any keys, each of which conforms
- to the value of the special key. This is used for cases in
- which the keys of the dictionary are things like object
- IDs.
- </para>
- </listitem>
- <listitem>
- <para>
- A string in the help output is a description of the item
- that appears in the corresponding location of the actual
- output. The corresponding output can have any format.
- </para>
- </listitem>
- <listitem>
- <para>
- An array in the help output always contains a single
- element. It indicates that the corresponding location in the
- actual output is also an array, and that each element of the
- array has whatever format is implied by the single element
- of the help output's array.
- </para>
- </listitem>
- </itemizedlist>
- For example, the help output indicates includes a
- "<literal>pagelabels</literal>" key whose value is
- an array of one element. That element is a dictionary with keys
- "<literal>index</literal>" and
- "<literal>label</literal>". In addition to
- describing the meaning of those keys, this tells you that the
- actual JSON output will contain a <literal>pagelabels</literal>
- array, each of whose elements is a dictionary that contains an
- <literal>index</literal> key, a <literal>label</literal> key,
- and no other keys.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>Directness and Simplicity</term>
- <listitem>
- <para>
- The JSON output contains the value of every object in the file,
- but it also contains some processed data. This is analogous to
- how qpdf's library interface works. The processed data is
- similar to the helper functions in that it allows you to look
- at certain aspects of the PDF file without having to understand
- all the nuances of the PDF specification, while the raw objects
- allow you to mine the PDF for anything that the higher-level
- interfaces are lacking.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </para>
- </sect1>
- <sect1 id="json.limitations">
- <title>Limitations of JSON Representation</title>
- <para>
- There are a few limitations to be aware of with the JSON structure:
- <itemizedlist>
- <listitem>
- <para>
- Strings, names, and indirect object references in the original
- PDF file are all converted to strings in the JSON
- representation. In the case of a "normal" PDF file,
- you can tell the difference because a name starts with a slash
- (<literal>/</literal>), and an indirect object reference looks
- like <literal>n n R</literal>, but if there were to be a string
- that looked like a name or indirect object reference, there
- would be no way to tell this from the JSON output. Note that
- there are certain cases where you know for sure what something
- is, such as knowing that dictionary keys in objects are always
- names and that certain things in the higher-level computed data
- are known to contain indirect object references.
- </para>
- </listitem>
- <listitem>
- <para>
- The JSON format doesn't support binary data very well. Mostly
- the details are not important, but they are presented here for
- information. When qpdf outputs a string in the JSON
- representation, it converts the string to UTF-8, assuming usual
- PDF string semantics. Specifically, if the original string is
- UTF-16, it is converted to UTF-8. Otherwise, it is assumed to
- have PDF doc encoding, and is converted to UTF-8 with that
- assumption. This causes strange things to happen to binary
- strings. For example, if you had the binary string
- <literal>&lt;038051&gt;</literal>, this would be output to the
- JSON as <literal>\u0003•Q</literal> because
- <literal>03</literal> is not a printable character and
- <literal>80</literal> is the bullet character in PDF doc
- encoding and is mapped to the Unicode value
- <literal>2022</literal>. Since <literal>51</literal> is
- <literal>Q</literal>, it is output as is. If you wanted to
- convert back from here to a binary string, would have to
- recognize Unicode values whose code points are higher than
- <literal>0xFF</literal> and map those back to their
- corresponding PDF doc encoding characters. There is no way to
- tell the difference between a Unicode string that was originally
- encoded as UTF-16 or one that was converted from PDF doc
- encoding. In other words, it's best if you don't try to use the
- JSON format to extract binary strings from the PDF file, but if
- you really had to, it could be done. Note that qpdf's
- @1@option@1@--show-object@2@option@2@ option does not have this
- limitation and will reveal the string as encoded in the original
- file.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- <sect1 id="json.considerations">
- <title>JSON: Special Considerations</title>
- <para>
- For the most part, the built-in JSON help tells you everything you
- need to know about the JSON format, but there are a few
- non-obvious things to be aware of:
- <itemizedlist>
- <listitem>
- <para>
- While qpdf guarantees that keys present in the help will be
- present in the output, those fields may be null or empty if the
- information is not known or absent in the file. Also, if you
- specify @1@option@1@--json-keys@2@option@2@, the keys that are not
- listed will be excluded entirely except for those that
- @1@option@1@--json-help@2@option@2@ says are always present.
- </para>
- </listitem>
- <listitem>
- <para>
- In a few places, there are keys with names containing
- <literal>pageposfrom1</literal>. The values of these keys are
- null or an integer. If an integer, they point to a page index
- within the file numbering from 1. Note that JSON indexes from
- 0, and you would also use 0-based indexing using the API.
- However, 1-based indexing is easier in this case because the
- command-line syntax for specifying page ranges is 1-based. If
- you were going to write a program that looked through the JSON
- for information about specific pages and then use the
- command-line to extract those pages, 1-based indexing is
- easier. Besides, it's more convenient to subtract 1 from a
- program in a real programming language than it is to add 1 from
- shell code.
- </para>
- </listitem>
- <listitem>
- <para>
- The image information included in the <literal>page</literal>
- section of the JSON output includes the key
- "<literal>filterable</literal>". Note that the
- value of this field may depend on the
- @1@option@1@--decode-level@2@option@2@ that you invoke qpdf with. The
- JSON output includes a top-level key
- "<literal>parameters</literal>" that indicates the
- decode level used for computing whether a stream was
- filterable. For example, jpeg images will be shown as not
- filterable by default, but they will be shown as filterable if
- you run @1@command@1@qpdf --json --decode-level=all@2@command@2@.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- </chapter>
- <chapter id="ref.design">
- <title>Design and Library Notes</title>
- <sect1 id="ref.design.intro">
- <title>Introduction</title>
- <para>
- This section was written prior to the implementation of the qpdf
- package and was subsequently modified to reflect the
- implementation. In some cases, for purposes of explanation, it
- may differ slightly from the actual implementation. As always,
- the source code and test suite are authoritative. Even if there
- are some errors, this document should serve as a road map to
- understanding how this code works.
- </para>
- <para>
- In general, one should adhere strictly to a specification when
- writing but be liberal in reading. This way, the product of our
- software will be accepted by the widest range of other programs,
- and we will accept the widest range of input files. This library
- attempts to conform to that philosophy whenever possible but also
- aims to provide strict checking for people who want to validate
- PDF files. If you don't want to see warnings and are trying to
- write something that is tolerant, you can call
- <literal>setSuppressWarnings(true)</literal>. If you want to fail
- on the first error, you can call
- <literal>setAttemptRecovery(false)</literal>. The default behavior
- is to generating warnings for recoverable problems. Note that
- recovery will not always produce the desired results even if it is
- able to get through the file. Unlike most other PDF files that
- produce generic warnings such as "This file is
- damaged,", qpdf generally issues a detailed error message
- that would be most useful to a PDF developer. This is by design as
- there seems to be a shortage of PDF validation tools out there.
- This was, in fact, one of the major motivations behind the initial
- creation of qpdf.
- </para>
- </sect1>
- <sect1 id="ref.design-goals">
- <title>Design Goals</title>
- <para>
- The QPDF package includes support for reading and rewriting PDF
- files. It aims to hide from the user details involving object
- locations, modified (appended) PDF files, the
- directness/indirectness of objects, and stream filters including
- encryption. It does not aim to hide knowledge of the object
- hierarchy or content stream contents. Put another way, a user of
- the qpdf library is expected to have knowledge about how PDF files
- work, but is not expected to have to keep track of bookkeeping
- details such as file positions.
- </para>
- <para>
- A user of the library never has to care whether an object is
- direct or indirect, though it is possible to determine whether an
- object is direct or not if this information is needed. All access
- to objects deals with this transparently. All memory management
- details are also handled by the library.
- </para>
- <para>
- The <classname>PointerHolder</classname> object is used internally
- by the library to deal with memory management. This is basically a
- smart pointer object very similar in spirit to C++-11's
- <classname>std::shared_ptr</classname> object, but predating it by
- several years. This library also makes use of a technique for
- giving fine-grained access to methods in one class to other
- classes by using public subclasses with friends and only private
- members that in turn call private methods of the containing class.
- See <classname>QPDFObjectHandle::Factory</classname> as an
- example.
- </para>
- <para>
- The top-level qpdf class is <classname>QPDF</classname>. A
- <classname>QPDF</classname> object represents a PDF file. The
- library provides methods for both accessing and mutating PDF
- files.
- </para>
- <para>
- The primary class for interacting with PDF objects is
- <classname>QPDFObjectHandle</classname>. Instances of this class
- can be passed around by value, copied, stored in containers, etc.
- with very low overhead. Instances of
- <classname>QPDFObjectHandle</classname> created by reading from a
- file will always contain a reference back to the
- <classname>QPDF</classname> object from which they were created. A
- <classname>QPDFObjectHandle</classname> may be direct or indirect.
- If indirect, the <classname>QPDFObject</classname> the
- <classname>PointerHolder</classname> initially points to is a null
- pointer. In this case, the first attempt to access the underlying
- <classname>QPDFObject</classname> will result in the
- <classname>QPDFObject</classname> being resolved via a call to the
- referenced <classname>QPDF</classname> instance. This makes it
- essentially impossible to make coding errors in which certain
- things will work for some PDF files and not for others based on
- which objects are direct and which objects are indirect.
- </para>
- <para>
- Instances of <classname>QPDFObjectHandle</classname> can be
- directly created and modified using static factory methods in the
- <classname>QPDFObjectHandle</classname> class. There are factory
- methods for each type of object as well as a convenience method
- <function>QPDFObjectHandle::parse</function> that creates an
- object from a string representation of the object. Existing
- instances of <classname>QPDFObjectHandle</classname> can also be
- modified in several ways. See comments in
- @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ for details.
- </para>
- <para>
- An instance of <classname>QPDF</classname> is constructed by using
- the class's default constructor. If desired, the
- <classname>QPDF</classname> object may be configured with various
- methods that change its default behavior. Then the
- <function>QPDF::processFile()</function> method is passed the name
- of a PDF file, which permanently associates the file with that
- QPDF object. A password may also be given for access to
- password-protected files. QPDF does not enforce encryption
- parameters and will treat user and owner passwords equivalently.
- Either password may be used to access an encrypted file.
- <classname>QPDF</classname> will allow recovery of a user password
- given an owner password. The input PDF file must be seekable.
- (Output files written by <classname>QPDFWriter</classname> need
- not be seekable, even when creating linearized files.) During
- construction, <classname>QPDF</classname> validates the PDF file's
- header, and then reads the cross reference tables and trailer
- dictionaries. The <classname>QPDF</classname> class keeps only
- the first trailer dictionary though it does read all of them so it
- can check the <literal>/Prev</literal> key.
- <classname>QPDF</classname> class users may request the root
- object and the trailer dictionary specifically. The cross
- reference table is kept private. Objects may then be requested by
- number of by walking the object tree.
- </para>
- <para>
- When a PDF file has a cross-reference stream instead of a
- cross-reference table and trailer, requesting the document's
- trailer dictionary returns the stream dictionary from the
- cross-reference stream instead.
- </para>
- <para>
- There are some convenience routines for very common operations
- such as walking the page tree and returning a vector of all page
- objects. For full details, please see the header files
- @1@filename@1@QPDF.hh@2@filename@2@ and
- @1@filename@1@QPDFObjectHandle.hh@2@filename@2@. There are also some
- additional helper classes that provide higher level API functions
- for certain document constructions. These are discussed in <xref
- linkend="ref.helper-classes"/>.
- </para>
- </sect1>
- <sect1 id="ref.helper-classes">
- <title>Helper Classes</title>
- <para>
- QPDF version 8.1 introduced the concept of helper classes. Helper
- classes are intended to contain higher level APIs that allow
- developers to work with certain document constructs at an
- abstraction level above that of
- <classname>QPDFObjectHandle</classname> while staying true to
- qpdf's philosophy of not hiding document structure from the
- developer. As with qpdf in general, the goal is take away some of
- the more tedious bookkeeping aspects of working with PDF files,
- not to remove the need for the developer to understand how the PDF
- construction in question works. The driving factor behind the
- creation of helper classes was to allow the evolution of higher
- level interfaces in qpdf without polluting the interfaces of the
- main top-level classes <classname>QPDF</classname> and
- <classname>QPDFObjectHandle</classname>.
- </para>
- <para>
- There are two kinds of helper classes:
- <emphasis>document</emphasis> helpers and
- <emphasis>object</emphasis> helpers. Document helpers are
- constructed with a reference to a <classname>QPDF</classname>
- object and provide methods for working with structures that are at
- the document level. Object helpers are constructed with an
- instance of a <classname>QPDFObjectHandle</classname> and provide
- methods for working with specific types of objects.
- </para>
- <para>
- Examples of document helpers include
- <classname>QPDFPageDocumentHelper</classname>, which contains
- methods for operating on the document's page trees, such as
- enumerating all pages of a document and adding and removing pages;
- and <classname>QPDFAcroFormDocumentHelper</classname>, which
- contains document-level methods related to interactive forms, such
- as enumerating form fields and creating mappings between form
- fields and annotations.
- </para>
- <para>
- Examples of object helpers include
- <classname>QPDFPageObjectHelper</classname> for performing
- operations on pages such as page rotation and some operations on
- content streams, <classname>QPDFFormFieldObjectHelper</classname>
- for performing operations related to interactive form fields, and
- <classname>QPDFAnnotationObjectHelper</classname> for working with
- annotations.
- </para>
- <para>
- It is always possible to retrieve the underlying
- <classname>QPDF</classname> reference from a document helper and
- the underlying <classname>QPDFObjectHandle</classname> reference
- from an object helper. Helpers are designed to be helpers, not
- wrappers. The intention is that, in general, it is safe to freely
- intermix operations that use helpers with operations that use the
- underlying objects. Document and object helpers do not attempt to
- provide a complete interface for working with the things they are
- helping with, nor do they attempt to encapsulate underlying
- structures. They just provide a few methods to help with
- error-prone, repetitive, or complex tasks. In some cases, a helper
- object may cache some information that is expensive to gather. In
- such cases, the helper classes are implemented so that their own
- methods keep the cache consistent, and the header file will
- provide a method to invalidate the cache and a description of what
- kinds of operations would make the cache invalid. If in doubt, you
- can always discard a helper class and create a new one with the
- same underlying objects, which will ensure that you have discarded
- any stale information.
- </para>
- <para>
- By Convention, document helpers are called
- <classname>QPDFSomethingDocumentHelper</classname> and are derived
- from <classname>QPDFDocumentHelper</classname>, and object helpers
- are called <classname>QPDFSomethingObjectHelper</classname> and
- are derived from <classname>QPDFObjectHelper</classname>. For
- details on specific helpers, please see their header files. You
- can find them by looking at
- @1@filename@1@include/qpdf/QPDF*DocumentHelper.hh@2@filename@2@ and
- @1@filename@1@include/qpdf/QPDF*ObjectHelper.hh@2@filename@2@.
- </para>
- <para>
- In order to avoid creation of circular dependencies, the following
- general guidelines are followed with helper classes:
- <itemizedlist>
- <listitem>
- <para>
- Core class interfaces do not know about helper classes. For
- example, no methods of <classname>QPDF</classname> or
- <classname>QPDFObjectHandle</classname> will include helper
- classes in their interfaces.
- </para>
- </listitem>
- <listitem>
- <para>
- Interfaces of object helpers will usually not use document
- helpers in their interfaces. This is because it is much more
- useful for document helpers to have methods that return object
- helpers. Most operations in PDF files start at the document
- level and go from there to the object level rather than the
- other way around. It can sometimes be useful to map back from
- object-level structures to document-level structures. If there
- is a desire to do this, it will generally be provided by a
- method in the document helper class.
- </para>
- </listitem>
- <listitem>
- <para>
- Most of the time, object helpers don't know about other object
- helpers. However, in some cases, one type of object may be a
- container for another type of object, in which case it may make
- sense for the outer object to know about the inner object. For
- example, there are methods in the
- <classname>QPDFPageObjectHelper</classname> that know
- <classname>QPDFAnnotationObjectHelper</classname> because
- references to annotations are contained in page dictionaries.
- </para>
- </listitem>
- <listitem>
- <para>
- Any helper or core library class may use helpers in their
- implementations.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Prior to qpdf version 8.1, higher level interfaces were added as
- "convenience functions" in either
- <classname>QPDF</classname> or
- <classname>QPDFObjectHandle</classname>. For compatibility, older
- convenience functions for operating with pages will remain in
- those classes even as alternatives are provided in helper classes.
- Going forward, new higher level interfaces will be provided using
- helper classes.
- </para>
- </sect1>
- <sect1 id="ref.implementation-notes">
- <title>Implementation Notes</title>
- <para>
- This section contains a few notes about QPDF's internal
- implementation, particularly around what it does when it first
- processes a file. This section is a bit of a simplification of
- what it actually does, but it could serve as a starting point to
- someone trying to understand the implementation. There is nothing
- in this section that you need to know to use the qpdf library.
- </para>
- <para>
- <classname>QPDFObject</classname> is the basic PDF Object class.
- It is an abstract base class from which are derived classes for
- each type of PDF object. Clients do not interact with Objects
- directly but instead interact with
- <classname>QPDFObjectHandle</classname>.
- </para>
- <para>
- When the <classname>QPDF</classname> class creates a new object,
- it dynamically allocates the appropriate type of
- <classname>QPDFObject</classname> and immediately hands the
- pointer to an instance of <classname>QPDFObjectHandle</classname>.
- The parser reads a token from the current file position. If the
- token is a not either a dictionary or array opener, an object is
- immediately constructed from the single token and the parser
- returns. Otherwise, the parser iterates in a special mode in which
- it accumulates objects until it finds a balancing closer. During
- this process, the "<literal>R</literal>" keyword is
- recognized and an indirect <classname>QPDFObjectHandle</classname>
- may be constructed.
- </para>
- <para>
- The <function>QPDF::resolve()</function> method, which is used to
- resolve an indirect object, may be invoked from the
- <classname>QPDFObjectHandle</classname> class. It first checks a
- cache to see whether this object has already been read. If not,
- it reads the object from the PDF file and caches it. It the
- returns the resulting <classname>QPDFObjectHandle</classname>.
- The calling object handle then replaces its
- <classname>PointerHolder&lt;QDFObject&gt;</classname> with the one
- from the newly returned <classname>QPDFObjectHandle</classname>.
- In this way, only a single copy of any direct object need exist
- and clients can access objects transparently without knowing
- caring whether they are direct or indirect objects. Additionally,
- no object is ever read from the file more than once. That means
- that only the portions of the PDF file that are actually needed
- are ever read from the input file, thus allowing the qpdf package
- to take advantage of this important design goal of PDF files.
- </para>
- <para>
- If the requested object is inside of an object stream, the object
- stream itself is first read into memory. Then the tokenizer reads
- objects from the memory stream based on the offset information
- stored in the stream. Those individual objects are cached, after
- which the temporary buffer holding the object stream contents are
- discarded. In this way, the first time an object in an object
- stream is requested, all objects in the stream are cached.
- </para>
- <para>
- The following example should clarify how
- <classname>QPDF</classname> processes a simple file.
- <itemizedlist>
- <listitem>
- <para>
- Client constructs <classname>QPDF</classname>
- <varname>pdf</varname> and calls
- <function>pdf.processFile("a.pdf");</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- The <classname>QPDF</classname> class checks the beginning of
- @1@filename@1@a.pdf@2@filename@2@ for a PDF header. It then reads the
- cross reference table mentioned at the end of the file,
- ensuring that it is looking before the last
- <literal>%%EOF</literal>. After getting to
- <literal>trailer</literal> keyword, it invokes the parser.
- </para>
- </listitem>
- <listitem>
- <para>
- The parser sees "<literal>&lt;&lt;</literal>", so
- it calls itself recursively in dictionary creation mode.
- </para>
- </listitem>
- <listitem>
- <para>
- In dictionary creation mode, the parser keeps accumulating
- objects until it encounters
- "<literal>&gt;&gt;</literal>". Each object that is
- read is pushed onto a stack. If
- "<literal>R</literal>" is read, the last two
- objects on the stack are inspected. If they are integers, they
- are popped off the stack and their values are used to construct
- an indirect object handle which is then pushed onto the stack.
- When "<literal>&gt;&gt;</literal>" is finally read,
- the stack is converted into a
- <classname>QPDF_Dictionary</classname> which is placed in a
- <classname>QPDFObjectHandle</classname> and returned.
- </para>
- </listitem>
- <listitem>
- <para>
- The resulting dictionary is saved as the trailer dictionary.
- </para>
- </listitem>
- <listitem>
- <para>
- The <literal>/Prev</literal> key is searched. If present,
- <classname>QPDF</classname> seeks to that point and repeats
- except that the new trailer dictionary is not saved. If
- <literal>/Prev</literal> is not present, the initial parsing
- process is complete.
- </para>
- <para>
- If there is an encryption dictionary, the document's encryption
- parameters are initialized.
- </para>
- </listitem>
- <listitem>
- <para>
- The client requests root object. The
- <classname>QPDF</classname> class gets the value of root key
- from trailer dictionary and returns it. It is an unresolved
- indirect <classname>QPDFObjectHandle</classname>.
- </para>
- </listitem>
- <listitem>
- <para>
- The client requests the <literal>/Pages</literal> key from root
- <classname>QPDFObjectHandle</classname>. The
- <classname>QPDFObjectHandle</classname> notices that it is
- indirect so it asks <classname>QPDF</classname> to resolve it.
- <classname>QPDF</classname> looks in the object cache for an
- object with the root dictionary's object ID and generation
- number. Upon not seeing it, it checks the cross reference
- table, gets the offset, and reads the object present at that
- offset. It stores the result in the object cache and returns
- the cached result. The calling
- <classname>QPDFObjectHandle</classname> replaces its object
- pointer with the one from the resolved
- <classname>QPDFObjectHandle</classname>, verifies that it a
- valid dictionary object, and returns the (unresolved indirect)
- <classname>QPDFObject</classname> handle to the top of the
- Pages hierarchy.
- </para>
- <para>
- As the client continues to request objects, the same process is
- followed for each new requested object.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- <sect1 id="ref.casting">
- <title>Casting Policy</title>
- <para>
- This section describes the casting policy followed by qpdf's
- implementation. This is no concern to qpdf's end users and
- largely of no concern to people writing code that uses qpdf, but
- it could be of interest to people who are porting qpdf to a new
- platform or who are making modifications to the code.
- </para>
- <para>
- The C++ code in qpdf is free of old-style casts except where
- unavoidable (e.g. where the old-style cast is in a macro provided
- by a third-party header file). When there is a need for a cast,
- it is handled, in order of preference, by rewriting the code to
- avoid the need for a cast, calling
- <function>const_cast</function>, calling
- <function>static_cast</function>, calling
- <function>reinterpret_cast</function>, or calling some combination
- of the above. As a last resort, a compiler-specific
- <literal>#pragma</literal> may be used to suppress a warning that
- we don't want to fix. Examples may include suppressing warnings
- about the use of old-style casts in code that is shared between C
- and C++ code.
- </para>
- <para>
- The <classname>QIntC</classname> namespace, provided by
- @1@filename@1@include/qpdf/QIntC.hh@2@filename@2@, implements safe
- functions for converting between integer types. These functions do
- range checking and throw a <type>std::range_error</type>, which is
- subclass of <type>std::runtime_error</type>, if conversion from one
- integer type to another results in loss of information. There are
- many cases in which we have to move between different integer
- types because of incompatible integer types used in interoperable
- interfaces. Some are unavoidable, such as moving between sizes and
- offsets, and others are there because of old code that is too in
- entrenched to be fixable without breaking source compatibility and
- causing pain for users. QPDF is compiled with extra warnings to
- detect conversions with potential data loss, and all such cases
- should be fixed by either using a function from
- <classname>QIntC</classname> or a
- <function>static_cast</function>.
- </para>
- <para>
- When the intention is just to switch the type because of
- exchanging data between incompatible interfaces, use
- <classname>QIntC</classname>. This is the usual case. However,
- there are some cases in which we are explicitly intending to use
- the exact same bit pattern with a different type. This is most
- common when switching between signed and unsigned characters. A
- lot of qpdf's code uses unsigned characters internally, but
- <type>std::string</type> and <type>char</type> are signed. Using
- <function>QIntC::to_char</function> would be wrong for converting
- from unsigned to signed characters because a negative
- <type>char</type> value and the corresponding <type>unsigned
- char</type> value greater than 127 <emphasis>mean the same
- thing</emphasis>. There are also cases in which we use
- <function>static_cast</function> when working with bit fields
- where we are not representing a numerical value but rather a bunch
- of bits packed together in some integer type. Also note that
- <type>size_t</type> and <type>long</type> both typically differ
- between 32-bit and 64-bit environments, so sometimes an explicit
- cast may not be needed to avoid warnings on one platform but may
- be needed on another. A conversion with
- <classname>QIntC</classname> should always be used when the types
- are different even if the underlying size is the same. QPDF's CI
- build builds on 32-bit and 64-bit platforms, and the test suite is
- very thorough, so it is hard to make any of the potential errors
- here without being caught in build or test.
- </para>
- <para>
- Non-const <type>unsigned char*</type> is used in the
- <type>Pipeline</type> interface. The pipeline interface has a
- <function>write</function> call that uses <type>unsigned
- char*</type> without a <type>const</type> qualifier. The main
- reason for this is to support pipelines that make calls to
- third-party libraries, such as zlib, that don't include
- <type>const</type> in their interfaces. Unfortunately, there are
- many places in the code where it is desirable to have <type>const
- char*</type> with pipelines. None of the pipeline implementations
- in qpdf currently modify the data passed to write, and doing so
- would be counter to the intent of <type>Pipeline</type>, but there
- is nothing in the code to prevent this from being done. There are
- places in the code where <function>const_cast</function> is used
- to remove the const-ness of pointers going into
- <type>Pipeline</type>s. This could theoretically be unsafe, but
- there is adequate testing to assert that it is safe and will
- remain safe in qpdf's code.
- </para>
- </sect1>
- <sect1 id="ref.encryption">
- <title>Encryption</title>
- <para>
- Encryption is supported transparently by qpdf. When opening a PDF
- file, if an encryption dictionary exists, the
- <classname>QPDF</classname> object processes this dictionary using
- the password (if any) provided. The primary decryption key is
- computed and cached. No further access is made to the encryption
- dictionary after that time. When an object is read from a file,
- the object ID and generation of the object in which it is
- contained is always known. Using this information along with the
- stored encryption key, all stream and string objects are
- transparently decrypted. Raw encrypted objects are never stored
- in memory. This way, nothing in the library ever has to know or
- care whether it is reading an encrypted file.
- </para>
- <para>
- An interface is also provided for writing encrypted streams and
- strings given an encryption key. This is used by
- <classname>QPDFWriter</classname> when it rewrites encrypted
- files.
- </para>
- <para>
- When copying encrypted files, unless otherwise directed, qpdf will
- preserve any encryption in force in the original file. qpdf can
- do this with either the user or the owner password. There is no
- difference in capability based on which password is used. When 40
- or 128 bit encryption keys are used, the user password can be
- recovered with the owner password. With 256 keys, the user and
- owner passwords are used independently to encrypt the actual
- encryption key, so while either can be used, the owner password
- can no longer be used to recover the user password.
- </para>
- <para>
- Starting with version 4.0.0, qpdf can read files that are not
- encrypted but that contain encrypted attachments, but it cannot
- write such files. qpdf also requires the password to be specified
- in order to open the file, not just to extract attachments, since
- once the file is open, all decryption is handled transparently.
- When copying files like this while preserving encryption, qpdf
- will apply the file's encryption to everything in the file, not
- just to the attachments. When decrypting the file, qpdf will
- decrypt the attachments. In general, when copying PDF files with
- multiple encryption formats, qpdf will choose the newest format.
- The only exception to this is that clear-text metadata will be
- preserved as clear-text if it is that way in the original file.
- </para>
- <para>
- One point of confusion some people have about encrypted PDF files
- is that encryption is not the same as password protection.
- Password protected files are always encrypted, but it is also
- possible to create encrypted files that do not have passwords.
- Internally, such files use the empty string as a password, and
- most readers try the empty string first to see if it works and
- prompt for a password only if the empty string doesn't work.
- Normally such files have an empty user password and a non-empty
- owner password. In that way, if the file is opened by an ordinary
- reader without specification of password, the restrictions
- specified in the encryption dictionary can be enforced. Most users
- wouldn't even realize such a file was encrypted. Since qpdf always
- ignores the restrictions (except for the purpose of reporting what
- they are), qpdf doesn't care which password you use. QPDF will
- allow you to create PDF files with non-empty user passwords and
- empty owner passwords. Some readers will require a password when
- you open these files, and others will open the files without a
- password and not enforce restrictions. Having a non-empty user
- password and an empty owner password doesn't really make sense
- because it would mean that opening the file with the user password
- would be more restrictive than not supplying a password at all.
- QPDF also allows you to create PDF files with the same password as
- both the user and owner password. Some readers will not ever allow
- such files to be accessed without restrictions because they never
- try the password as the owner password if it works as the user
- password. Nonetheless, one of the powerful aspects of qpdf is that
- it allows you to finely specify the way encrypted files are
- created, even if the results are not useful to some readers. One
- use case for this would be for testing a PDF reader to ensure that
- it handles odd configurations of input files.
- </para>
- </sect1>
- <sect1 id="ref.random-numbers">
- <title>Random Number Generation</title>
- <para>
- QPDF generates random numbers to support generation of encrypted
- data. Starting in qpdf 10.0.0, qpdf uses the crypto provider as
- its source of random numbers. Older versions used the OS-provided
- source of secure random numbers or, if allowed at build time,
- insecure random numbers from stdlib. Starting with version 5.1.0,
- you can disable use of OS-provided secure random numbers at build
- time. This is especially useful on Windows if you want to avoid a
- dependency on Microsoft's cryptography API. You can also supply
- your own random data provider. For details on how to do this,
- please refer to the top-level README.md file in the source
- distribution and to comments in @1@filename@1@QUtil.hh@2@filename@2@.
- </para>
- </sect1>
- <sect1 id="ref.adding-and-remove-pages">
- <title>Adding and Removing Pages</title>
- <para>
- While qpdf's API has supported adding and modifying objects for
- some time, version 3.0 introduces specific methods for adding and
- removing pages. These are largely convenience routines that
- handle two tricky issues: pushing inheritable resources from the
- <literal>/Pages</literal> tree down to individual pages and
- manipulation of the <literal>/Pages</literal> tree itself. For
- details, see <function>addPage</function> and surrounding methods
- in @1@filename@1@QPDF.hh@2@filename@2@.
- </para>
- </sect1>
- <sect1 id="ref.reserved-objects">
- <title>Reserving Object Numbers</title>
- <para>
- Version 3.0 of qpdf introduced the concept of reserved objects.
- These are seldom needed for ordinary operations, but there are
- cases in which you may want to add a series of indirect objects
- with references to each other to a <classname>QPDF</classname>
- object. This causes a problem because you can't determine the
- object ID that a new indirect object will have until you add it to
- the <classname>QPDF</classname> object with
- <function>QPDF::makeIndirectObject</function>. The only way to
- add two mutually referential objects to a
- <classname>QPDF</classname> object prior to version 3.0 would be
- to add the new objects first and then make them refer to each
- other after adding them. Now it is possible to create a
- @1@firstterm@1@reserved object@2@firstterm@2@ using
- <function>QPDFObjectHandle::newReserved</function>. This is an
- indirect object that stays "unresolved" even if it is
- queried for its type. So now, if you want to create a set of
- mutually referential objects, you can create reservations for each
- one of them and use those reservations to construct the
- references. When finished, you can call
- <function>QPDF::replaceReserved</function> to replace the reserved
- objects with the real ones. This functionality will never be
- needed by most applications, but it is used internally by QPDF
- when copying objects from other PDF files, as discussed in <xref
- linkend="ref.foreign-objects"/>. For an example of how to use
- reserved objects, search for <function>newReserved</function> in
- @1@filename@1@test_driver.cc@2@filename@2@ in qpdf's sources.
- </para>
- </sect1>
- <sect1 id="ref.foreign-objects">
- <title>Copying Objects From Other PDF Files</title>
- <para>
- Version 3.0 of qpdf introduced the ability to copy objects into a
- <classname>QPDF</classname> object from a different
- <classname>QPDF</classname> object, which we refer to as
- @1@firstterm@1@foreign objects@2@firstterm@2@. This allows arbitrary
- merging of PDF files. The "from"
- <classname>QPDF</classname> object must remain valid after the
- copy as discussed in the note below. The @1@command@1@qpdf@2@command@2@
- command-line tool provides limited support for basic page
- selection, including merging in pages from other files, but the
- library's API makes it possible to implement arbitrarily complex
- merging operations. The main method for copying foreign objects is
- <function>QPDF::copyForeignObject</function>. This takes an
- indirect object from another <classname>QPDF</classname> and
- copies it recursively into this object while preserving all object
- structure, including circular references. This means you can add a
- direct object that you create from scratch to a
- <classname>QPDF</classname> object with
- <function>QPDF::makeIndirectObject</function>, and you can add an
- indirect object from another file with
- <function>QPDF::copyForeignObject</function>. The fact that
- <function>QPDF::makeIndirectObject</function> does not
- automatically detect a foreign object and copy it is an explicit
- design decision. Copying a foreign object seems like a
- sufficiently significant thing to do that it should be done
- explicitly.
- </para>
- <para>
- The other way to copy foreign objects is by passing a page from
- one <classname>QPDF</classname> to another by calling
- <function>QPDF::addPage</function>. In contrast to
- <function>QPDF::makeIndirectObject</function>, this method
- automatically distinguishes between indirect objects in the
- current file, foreign objects, and direct objects.
- </para>
- <para>
- Please note: when you copy objects from one
- <classname>QPDF</classname> to another, the source
- <classname>QPDF</classname> object must remain valid until you
- have finished with the destination object. This is because the
- original object is still used to retrieve any referenced stream
- data from the copied object.
- </para>
- </sect1>
- <sect1 id="ref.rewriting">
- <title>Writing PDF Files</title>
- <para>
- The qpdf library supports file writing of
- <classname>QPDF</classname> objects to PDF files through the
- <classname>QPDFWriter</classname> class. The
- <classname>QPDFWriter</classname> class has two writing modes: one
- for non-linearized files, and one for linearized files. See <xref
- linkend="ref.linearization"/> for a description of linearization
- is implemented. This section describes how we write
- non-linearized files including the creation of QDF files (see
- <xref linkend="ref.qdf"/>.
- </para>
- <para>
- This outline was written prior to implementation and is not
- exactly accurate, but it provides a correct "notional"
- idea of how writing works. Look at the code in
- <classname>QPDFWriter</classname> for exact details.
- <itemizedlist>
- <listitem>
- <para>
- Initialize state:
- <itemizedlist>
- <listitem>
- <para>
- next object number = 1
- </para>
- </listitem>
- <listitem>
- <para>
- object queue = empty
- </para>
- </listitem>
- <listitem>
- <para>
- renumber table: old object id/generation to new id/0 = empty
- </para>
- </listitem>
- <listitem>
- <para>
- xref table: new id -> offset = empty
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- <listitem>
- <para>
- Create a QPDF object from a file.
- </para>
- </listitem>
- <listitem>
- <para>
- Write header for new PDF file.
- </para>
- </listitem>
- <listitem>
- <para>
- Request the trailer dictionary.
- </para>
- </listitem>
- <listitem>
- <para>
- For each value that is an indirect object, grab the next object
- number (via an operation that returns and increments the
- number). Map object to new number in renumber table. Push
- object onto queue.
- </para>
- </listitem>
- <listitem>
- <para>
- While there are more objects on the queue:
- <itemizedlist>
- <listitem>
- <para>
- Pop queue.
- </para>
- </listitem>
- <listitem>
- <para>
- Look up object's new number <emphasis>n</emphasis> in the
- renumbering table.
- </para>
- </listitem>
- <listitem>
- <para>
- Store current offset into xref table.
- </para>
- </listitem>
- <listitem>
- <para>
- Write <literal>@1@replaceable@1@n@2@replaceable@2@ 0 obj</literal>.
- </para>
- </listitem>
- <listitem>
- <para>
- If object is null, whether direct or indirect, write out
- null, thus eliminating unresolvable indirect object
- references.
- </para>
- </listitem>
- <listitem>
- <para>
- If the object is a stream stream, write stream contents,
- piped through any filters as required, to a memory buffer.
- Use this buffer to determine the stream length.
- </para>
- </listitem>
- <listitem>
- <para>
- If object is not a stream, array, or dictionary, write out
- its contents.
- </para>
- </listitem>
- <listitem>
- <para>
- If object is an array or dictionary (including stream),
- traverse its elements (for array) or values (for
- dictionaries), handling recursive dictionaries and arrays,
- looking for indirect objects. When an indirect object is
- found, if it is not resolvable, ignore. (This case is
- handled when writing it out.) Otherwise, look it up in the
- renumbering table. If not found, grab the next available
- object number, assign to the referenced object in the
- renumbering table, and push the referenced object onto the
- queue. As a special case, when writing out a stream
- dictionary, replace length, filters, and decode parameters
- as required.
- </para>
- <para>
- Write out dictionary or array, replacing any unresolvable
- indirect object references with null (pdf spec says
- reference to non-existent object is legal and resolves to
- null) and any resolvable ones with references to the
- renumbered objects.
- </para>
- </listitem>
- <listitem>
- <para>
- If the object is a stream, write
- <literal>stream\n</literal>, the stream contents (from the
- memory buffer), and <literal>\nendstream\n</literal>.
- </para>
- </listitem>
- <listitem>
- <para>
- When done, write <literal>endobj</literal>.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Once we have finished the queue, all referenced objects will have
- been written out and all deleted objects or unreferenced objects
- will have been skipped. The new cross-reference table will
- contain an offset for every new object number from 1 up to the
- number of objects written. This can be used to write out a new
- xref table. Finally we can write out the trailer dictionary with
- appropriately computed /ID (see spec, 8.3, File Identifiers), the
- cross reference table offset, and <literal>%%EOF</literal>.
- </para>
- </sect1>
- <sect1 id="ref.filtered-streams">
- <title>Filtered Streams</title>
- <para>
- Support for streams is implemented through the
- <classname>Pipeline</classname> interface which was designed for
- this package.
- </para>
- <para>
- When reading streams, create a series of
- <classname>Pipeline</classname> objects. The
- <classname>Pipeline</classname> abstract base requires
- implementation <function>write()</function> and
- <function>finish()</function> and provides an implementation of
- <function>getNext()</function>. Each pipeline object, upon
- receiving data, does whatever it is going to do and then writes
- the data (possibly modified) to its successor. Alternatively, a
- pipeline may be an end-of-the-line pipeline that does something
- like store its output to a file or a memory buffer ignoring a
- successor. For additional details, look at
- @1@filename@1@Pipeline.hh@2@filename@2@.
- </para>
- <para>
- <classname>QPDF</classname> can read raw or filtered streams.
- When reading a filtered stream, the <classname>QPDF</classname>
- class creates a <classname>Pipeline</classname> object for one of
- each appropriate filter object and chains them together. The last
- filter should write to whatever type of output is required. The
- <classname>QPDF</classname> class has an interface to write raw or
- filtered stream contents to a given pipeline.
- </para>
- </sect1>
- <sect1 id="ref.object-accessors">
- <title>Object Accessor Methods</title>
- <para>
- @1@comment: This section is referenced in QPDFObjectHandle.hh @1@
- </para>
- <para>
- For general information about how to access instances of
- <classname>QPDFObjectHandle</classname>, please see the comments
- in @1@filename@1@QPDFObjectHandle.hh@2@filename@2@. Search for
- "Accessor methods". This section provides a more
- in-depth discussion of the behavior and the rationale for the
- behavior.
- </para>
- <para>
- <emphasis>Why were type errors made into warnings?</emphasis> When
- type checks were introduced into qpdf in the early days, it was
- expected that type errors would only occur as a result of
- programmer error. However, in practice, type errors would occur
- with malformed PDF files because of assumptions made in code,
- including code within the qpdf library and code written by library
- users. The most common case would be chaining calls to
- <function>getKey()</function> to access keys deep within a
- dictionary. In many cases, qpdf would be able to recover from
- these situations, but the old behavior often resulted in crashes
- rather than graceful recovery. For this reason, the errors were
- changed to warnings.
- </para>
- <para>
- <emphasis>Why even warn about type errors when the user can't
- usually do anything about them?</emphasis> Type warnings are
- extremely valuable during development. Since it's impossible to
- catch at compile time things like typos in dictionary key names or
- logic errors around what the structure of a PDF file might be, the
- presence of type warnings can save lots of developer time. They
- have also proven useful in exposing issues in qpdf itself that
- would have otherwise gone undetected.
- </para>
- <para>
- <emphasis>Can there be a type-safe
- <classname>QPDFObjectHandle</classname>?</emphasis> It would be
- great if <classname>QPDFObjectHandle</classname> could be more
- strongly typed so that you'd have to have check that something was
- of a particular type before calling type-specific accessor
- methods. However, implementing this at this stage of the library's
- history would be quite difficult, and it would make a the common
- pattern of drilling into an object no longer work. While it would
- be possible to have a parallel interface, it would create a lot of
- extra code. If qpdf were written in a language like rust, an
- interface like this would make a lot of sense, but, for a variety
- of reasons, the qpdf API is consistent with other APIs of its
- time, relying on exception handling to catch errors. The
- underlying PDF objects are inherently not type-safe. Forcing
- stronger type safety in <classname>QPDFObjectHandle</classname>
- would ultimately cause a lot more code to have to be written and
- would like make software that uses qpdf more brittle, and even so,
- checks would have to occur at runtime.
- </para>
- <para>
- <emphasis>Why do type errors sometimes raise
- exceptions?</emphasis> The way warnings work in qpdf requires a
- <classname>QPDF</classname> object to be associated with an object
- handle for a warning to be issued. It would be nice if this could
- be fixed, but it would require major changes to the API. Rather
- than throwing away these conditions, we convert them to
- exceptions. It's not that bad though. Since any object handle that
- was read from a file has an associated <classname>QPDF</classname>
- object, it would only be type errors on objects that were created
- explicitly that would cause exceptions, and in that case, type
- errors are much more likely to be the result of a coding error
- than invalid input.
- </para>
- <para>
- <emphasis>Why does the behavior of a type exception differ between
- the C and C++ API?</emphasis> There is no way to throw and catch
- exceptions in C short of something like
- <function>setjmp</function> and <function>longjmp</function>, and
- that approach is not portable across language barriers. Since the
- C API is often used from other languages, it's important to keep
- things as simple as possible. Starting in qpdf 10.5, exceptions
- that used to crash code using the C API will be written to stderr
- by default, and it is possible to register an error handler.
- There's no reason that the error handler can't simulate exception
- handling in some way, such as by using <function>setjmp</function>
- and <function>longjmp</function> or by setting some variable that
- can be checked after library calls are made. In retrospect, it
- might have been better if the C API object handle methods returned
- error codes like the other methods and set return values in
- passed-in pointers, but this would complicate both the
- implementation and the use of the library for a case that is
- actually quite rare and largely avoidable.
- </para>
- </sect1>
- </chapter>
- <chapter id="ref.linearization">
- <title>Linearization</title>
- <para>
- This chapter describes how <classname>QPDF</classname> and
- <classname>QPDFWriter</classname> implement creation and processing
- of linearized PDFS.
- </para>
- <sect1 id="ref.linearization-strategy">
- <title>Basic Strategy for Linearization</title>
- <para>
- To avoid the incestuous problem of having the qpdf library
- validate its own linearized files, we have a special linearized
- file checking mode which can be invoked via @1@command@1@qpdf
- --check-linearization@2@command@2@ (or @1@command@1@qpdf
- --check@2@command@2@). This mode reads the linearization parameter
- dictionary and the hint streams and validates that object
- ordering, parameters, and hint stream contents are correct. The
- validation code was first tested against linearized files created
- by external tools (Acrobat and pdlin) and then used to validate
- files created by <classname>QPDFWriter</classname> itself.
- </para>
- </sect1>
- <sect1 id="ref.linearized.preparation">
- <title>Preparing For Linearization</title>
- <para>
- Before creating a linearized PDF file from any other PDF file, the
- PDF file must be altered such that all page attributes are
- propagated down to the page level (and not inherited from parents
- in the <literal>/Pages</literal> tree). We also have to know
- which objects refer to which other objects, being concerned with
- page boundaries and a few other cases. We refer to this part of
- preparing the PDF file as @1@firstterm@1@optimization@2@firstterm@2@,
- discussed in <xref linkend="ref.optimization"/>. Note the, in
- this context, the term @1@firstterm@1@optimization@2@firstterm@2@ is a
- qpdf term, and the term @1@firstterm@1@linearization@2@firstterm@2@ is a
- term from the PDF specification. Do not be confused by the fact
- that many applications refer to linearization as optimization or
- web optimization.
- </para>
- <para>
- When creating linearized PDF files from optimized PDF files, there
- are really only a few issues that need to be dealt with:
- <itemizedlist>
- <listitem>
- <para>
- Creation of hints tables
- </para>
- </listitem>
- <listitem>
- <para>
- Placing objects in the correct order
- </para>
- </listitem>
- <listitem>
- <para>
- Filling in offsets and byte sizes
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- <sect1 id="ref.optimization">
- <title>Optimization</title>
- <para>
- In order to perform various operations such as linearization and
- splitting files into pages, it is necessary to know which objects
- are referenced by which pages, page thumbnails, and root and
- trailer dictionary keys. It is also necessary to ensure that all
- page-level attributes appear directly at the page level and are
- not inherited from parents in the pages tree.
- </para>
- <para>
- We refer to the process of enforcing these constraints as
- @1@firstterm@1@optimization@2@firstterm@2@. As mentioned above, note
- that some applications refer to linearization as optimization.
- Although this optimization was initially motivated by the need to
- create linearized files, we are using these terms separately.
- </para>
- <para>
- PDF file optimization is implemented in the
- @1@filename@1@QPDF_optimization.cc@2@filename@2@ source file. That file
- is richly commented and serves as the primary reference for the
- optimization process.
- </para>
- <para>
- After optimization has been completed, the private member
- variables <varname>obj_user_to_objects</varname> and
- <varname>object_to_obj_users</varname> in
- <classname>QPDF</classname> have been populated. Any object that
- has more than one value in the
- <varname>object_to_obj_users</varname> table is shared. Any
- object that has exactly one value in the
- <varname>object_to_obj_users</varname> table is private. To find
- all the private objects in a page or a trailer or root dictionary
- key, one merely has make this determination for each element in
- the <varname>obj_user_to_objects</varname> table for the given
- page or key.
- </para>
- <para>
- Note that pages and thumbnails have different object user types,
- so the above test on a page will not include objects referenced by
- the page's thumbnail dictionary and nothing else.
- </para>
- </sect1>
- <sect1 id="ref.linearization.writing">
- <title>Writing Linearized Files</title>
- <para>
- We will create files with only primary hint streams. We will
- never write overflow hint streams. (As of PDF version 1.4,
- Acrobat doesn't either, and they are never necessary.) The hint
- streams contain offset information to objects that point to where
- they would be if the hint stream were not present. This means
- that we have to calculate all object positions before we can
- generate and write the hint table. This means that we have to
- generate the file in two passes. To make this reliable,
- <classname>QPDFWriter</classname> in linearization mode invokes
- exactly the same code twice to write the file to a pipeline.
- </para>
- <para>
- In the first pass, the target pipeline is a count pipeline chained
- to a discard pipeline. The count pipeline simply passes its data
- through to the next pipeline in the chain but can return the
- number of bytes passed through it at any intermediate point. The
- discard pipeline is an end of line pipeline that just throws its
- data away. The hint stream is not written and dummy values with
- adequate padding are stored in the first cross reference table,
- linearization parameter dictionary, and /Prev key of the first
- trailer dictionary. All the offset, length, object renumbering
- information, and anything else we need for the second pass is
- stored.
- </para>
- <para>
- At the end of the first pass, this information is passed to the
- <classname>QPDF</classname> class which constructs a compressed
- hint stream in a memory buffer and returns it.
- <classname>QPDFWriter</classname> uses this information to write a
- complete hint stream object into a memory buffer. At this point,
- the length of the hint stream is known.
- </para>
- <para>
- In the second pass, the end of the pipeline chain is a regular
- file instead of a discard pipeline, and we have known values for
- all the offsets and lengths that we didn't have in the first pass.
- We have to adjust offsets that appear after the start of the hint
- stream by the length of the hint stream, which is known. Anything
- that is of variable length is padded, with the padding code
- surrounding any writing code that differs in the two passes. This
- ensures that changes to the way things are represented never
- results in offsets that were gathered during the first pass
- becoming incorrect for the second pass.
- </para>
- <para>
- Using this strategy, we can write linearized files to a
- non-seekable output stream with only a single pass to disk or
- wherever the output is going.
- </para>
- </sect1>
- <sect1 id="ref.linearization-data">
- <title>Calculating Linearization Data</title>
- <para>
- Once a file is optimized, we have information about which objects
- access which other objects. We can then process these tables to
- decide which part (as described in "Linearized PDF Document
- Structure" in the PDF specification) each object is
- contained within. This tells us the exact order in which objects
- are written. The <classname>QPDFWriter</classname> class asks for
- this information and enqueues objects for writing in the proper
- order. It also turns on a check that causes an exception to be
- thrown if an object is encountered that has not already been
- queued. (This could happen only if there were a bug in the
- traversal code used to calculate the linearization data.)
- </para>
- </sect1>
- <sect1 id="ref.linearization-issues">
- <title>Known Issues with Linearization</title>
- <para>
- There are a handful of known issues with this linearization code.
- These issues do not appear to impact the behavior of linearized
- files which still work as intended: it is possible for a web
- browser to begin to display them before they are fully
- downloaded. In fact, it seems that various other programs that
- create linearized files have many of these same issues. These
- items make reference to terminology used in the linearization
- appendix of the PDF specification.
- <itemizedlist>
- <listitem>
- <para>
- Thread Dictionary information keys appear in part 4 with the
- rest of Threads instead of in part 9. Objects in part 9 are
- not grouped together functionally.
- </para>
- </listitem>
- <listitem>
- <para>
- We are not calculating numerators for shared object positions
- within content streams or interleaving them within content
- streams.
- </para>
- </listitem>
- <listitem>
- <para>
- We generate only page offset, shared object, and outline hint
- tables. It would be relatively easy to add some additional
- tables. We gather most of the information needed to create
- thumbnail hint tables. There are comments in the code about
- this.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </sect1>
- <sect1 id="ref.linearization-debugging">
- <title>Debugging Note</title>
- <para>
- The @1@command@1@qpdf --show-linearization@2@command@2@ command can show
- the complete contents of linearization hint streams. To look at
- the raw data, you can extract the filtered contents of the
- linearization hint tables using @1@command@1@qpdf --show-object=n
- --filtered-stream-data@2@command@2@. Then, to convert this into a
- bit stream (since linearization tables are bit streams written
- without regard to byte boundaries), you can pipe the resulting
- data through the following perl code:
-
- <programlisting>use bytes;
-binmode STDIN;
-undef $/;
-my $a = &lt;STDIN&gt;;
-my @ch = split(//, $a);
-map { printf("%08b", ord($_)) } @ch;
-print "\n";
-</programlisting>
- </para>
- </sect1>
- </chapter>
- <chapter id="ref.object-and-xref-streams">
- <title>Object and Cross-Reference Streams</title>
- <para>
- This chapter provides information about the implementation of
- object stream and cross-reference stream support in qpdf.
- </para>
- <sect1 id="ref.object-streams">
- <title>Object Streams</title>
- <para>
- Object streams can contain any regular object except the
- following:
- <itemizedlist>
- <listitem>
- <para>
- stream objects
- </para>
- </listitem>
- <listitem>
- <para>
- objects with generation &gt; 0
- </para>
- </listitem>
- <listitem>
- <para>
- the encryption dictionary
- </para>
- </listitem>
- <listitem>
- <para>
- objects containing the /Length of another stream
- </para>
- </listitem>
- </itemizedlist>
- In addition, Adobe reader (at least as of version 8.0.0) appears
- to not be able to handle having the document catalog appear in an
- object stream if the file is encrypted, though this is not
- specifically disallowed by the specification.
- </para>
- <para>
- There are additional restrictions for linearized files. See <xref
- linkend="ref.object-streams-linearization"/>for details.
- </para>
- <para>
- The PDF specification refers to objects in object streams as
- "compressed objects" regardless of whether the object
- stream is compressed.
- </para>
- <para>
- The generation number of every object in an object stream must be
- zero. It is possible to delete and replace an object in an object
- stream with a regular object.
- </para>
- <para>
- The object stream dictionary has the following keys:
- <itemizedlist>
- <listitem>
- <para>
- <literal>/N</literal>: number of objects
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>/First</literal>: byte offset of first object
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>/Extends</literal>: indirect reference to stream that
- this extends
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- Stream collections are formed with <literal>/Extends</literal>.
- They must form a directed acyclic graph. These can be used for
- semantic information and are not meaningful to the PDF document's
- syntactic structure. Although qpdf preserves stream collections,
- it never generates them and doesn't make use of this information
- in any way.
- </para>
- <para>
- The specification recommends limiting the number of objects in
- object stream for efficiency in reading and decoding. Acrobat 6
- uses no more than 100 objects per object stream for linearized
- files and no more 200 objects per stream for non-linearized files.
- <classname>QPDFWriter</classname>, in object stream generation
- mode, never puts more than 100 objects in an object stream.
- </para>
- <para>
- Object stream contents consists of <emphasis>N</emphasis> pairs of
- integers, each of which is the object number and the byte offset
- of the object relative to the first object in the stream, followed
- by the objects themselves, concatenated.
- </para>
- </sect1>
- <sect1 id="ref.xref-streams">
- <title>Cross-Reference Streams</title>
- <para>
- For non-hybrid files, the value following
- <literal>startxref</literal> is the byte offset to the xref stream
- rather than the word <literal>xref</literal>.
- </para>
- <para>
- For hybrid files (files containing both xref tables and
- cross-reference streams), the xref table's trailer dictionary
- contains the key <literal>/XRefStm</literal> whose value is the
- byte offset to a cross-reference stream that supplements the xref
- table. A PDF 1.5-compliant application should read the xref table
- first. Then it should replace any object that it has already seen
- with any defined in the xref stream. Then it should follow any
- <literal>/Prev</literal> pointer in the original xref table's
- trailer dictionary. The specification is not clear about what
- should be done, if anything, with a <literal>/Prev</literal>
- pointer in the xref stream referenced by an xref table. The
- <classname>QPDF</classname> class ignores it, which is probably
- reasonable since, if this case were to appear for any sensible PDF
- file, the previous xref table would probably have a corresponding
- <literal>/XRefStm</literal> pointer of its own. For example, if a
- hybrid file were appended, the appended section would have its own
- xref table and <literal>/XRefStm</literal>. The appended xref
- table would point to the previous xref table which would point the
- <literal>/XRefStm</literal>, meaning that the new
- <literal>/XRefStm</literal> doesn't have to point to it.
- </para>
- <para>
- Since xref streams must be read very early, they may not be
- encrypted, and the may not contain indirect objects for keys
- required to read them, which are these:
- <itemizedlist>
- <listitem>
- <para>
- <literal>/Type</literal>: value <literal>/XRef</literal>
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>/Size</literal>: value <emphasis>n+1</emphasis>: where
- <emphasis>n</emphasis> is highest object number (same as
- <literal>/Size</literal> in the trailer dictionary)
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>/Index</literal> (optional): value
- <literal>[@1@replaceable@1@n count@2@replaceable@2@ ...]</literal>
- used to determine which objects' information is stored in this
- stream. The default is <literal>[0 /Size]</literal>.
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>/Prev</literal>: value
- @1@replaceable@1@offset@2@replaceable@2@: byte offset of previous xref
- stream (same as <literal>/Prev</literal> in the trailer
- dictionary)
- </para>
- </listitem>
- <listitem>
- <para>
- <literal>/W [...]</literal>: sizes of each field in the xref
- table
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- The other fields in the xref stream, which may be indirect if
- desired, are the union of those from the xref table's trailer
- dictionary.
- </para>
- <sect2 id="ref.xref-stream-data">
- <title>Cross-Reference Stream Data</title>
- <para>
- The stream data is binary and encoded in big-endian byte order.
- Entries are concatenated, and each entry has a length equal to
- the total of the entries in <literal>/W</literal> above. Each
- entry consists of one or more fields, the first of which is the
- type of the field. The number of bytes for each field is given
- by <literal>/W</literal> above. A 0 in <literal>/W</literal>
- indicates that the field is omitted and has the default value.
- The default value for the field type is
- "<literal>1</literal>". All other default values are
- "<literal>0</literal>".
- </para>
- <para>
- PDF 1.5 has three field types:
- <itemizedlist>
- <listitem>
- <para>
- 0: for free objects. Format: <literal>0 obj
- next-generation</literal>, same as the free table in a
- traditional cross-reference table
- </para>
- </listitem>
- <listitem>
- <para>
- 1: regular non-compressed object. Format: <literal>1 offset
- generation</literal>
- </para>
- </listitem>
- <listitem>
- <para>
- 2: for objects in object streams. Format: <literal>2
- object-stream-number index</literal>, the number of object
- stream containing the object and the index within the object
- stream of the object.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- <para>
- It seems standard to have the first entry in the table be
- <literal>0 0 0</literal> instead of <literal>0 0 ffff</literal>
- if there are no deleted objects.
- </para>
- </sect2>
- </sect1>
- <sect1 id="ref.object-streams-linearization">
- <title>Implications for Linearized Files</title>
- <para>
- For linearized files, the linearization dictionary, document
- catalog, and page objects may not be contained in object streams.
- </para>
- <para>
- Objects stored within object streams are given the highest range
- of object numbers within the main and first-page cross-reference
- sections.
- </para>
- <para>
- It is okay to use cross-reference streams in place of regular xref
- tables. There are on special considerations.
- </para>
- <para>
- Hint data refers to object streams themselves, not the objects in
- the streams. Shared object references should also be made to the
- object streams. There are no reference in any hint tables to the
- object numbers of compressed objects (objects within object
- streams).
- </para>
- <para>
- When numbering objects, all shared objects within both the first
- and second halves of the linearized files must be numbered
- consecutively after all normal uncompressed objects in that half.
- </para>
- </sect1>
- <sect1 id="ref.object-stream-implementation">
- <title>Implementation Notes</title>
- <para>
- There are three modes for writing object streams:
- @1@option@1@disable@2@option@2@, @1@option@1@preserve@2@option@2@, and
- @1@option@1@generate@2@option@2@. In disable mode, we do not generate
- any object streams, and we also generate an xref table rather than
- xref streams. This can be used to generate PDF files that are
- viewable with older readers. In preserve mode, we write object
- streams such that written object streams contain the same objects
- and <literal>/Extends</literal> relationships as in the original
- file. This is equal to disable if the file has no object streams.
- In generate, we create object streams ourselves by grouping
- objects that are allowed in object streams together in sets of no
- more than 100 objects. We also ensure that the PDF version is at
- least 1.5 in generate mode, but we preserve the version header in
- the other modes. The default is @1@option@1@preserve@2@option@2@.
- </para>
- <para>
- We do not support creation of hybrid files. When we write files,
- even in preserve mode, we will lose any xref tables and merge any
- appended sections.
- </para>
- </sect1>
- </chapter>
- <appendix id="ref.release-notes">
- <title>Release Notes</title>
- <para>
- For a detailed list of changes, please see the file
- @1@filename@1@ChangeLog@2@filename@2@ in the source distribution.
- </para>
- <variablelist>
-<!--
- <varlistentry>
- <term>x.y.z: Month dd, YYYY</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Category
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Item
- </para>
- </listitem>
- <listitem>
- <para>
- Item
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Category
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Item
- </para>
- </listitem>
- <listitem>
- <para>
- Item
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
--->
- <varlistentry>
- <term>10.5.0: XXX Month dd, YYYY</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Since qpdf version 8, using object accessor methods on an
- instance of <classname>QPDFObjectHandle</classname> may
- create warnings if the object is not of the expected type.
- These warnings now have an error code of
- <literal>qpdf_e_object</literal> instead of
- <literal>qpdf_e_damaged_pdf</literal>. Also, comments have
- been added to @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ to
- explain in more detail what the behavior is. See <xref
- linkend="ref.object-accessors"/> for a more in-depth
- discussion.
- </para>
- </listitem>
- <listitem>
- <para>
- Overhaul error handling for the object handle functions in
- the C API. See comments in the "Object handling"
- section of @1@filename@1@include/qpdf/qpdf-c.h@2@filename@2@ for
- details. In particular, exceptions thrown by the underlying
- C++ code when calling object accessors are caught and
- converted into errors. The errors can be trapped by
- registering an error handler with
- <function>qpdf_register_oh_error_handler</function> or will
- be written to stderr if no handler is registered.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>qpdf_get_last_string_length</function> to the
- C API to get the length of the last string that was
- returned. This is needed to handle strings that contain
- embedded null characters.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>qpdf_oh_is_initialized</function> and
- <function>qpdf_oh_new_uninitialized</function> to the C API
- to make it possible to work with uninitialized objects.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>qpdf_oh_new_object</function> to the
- C API. This allows you to clone an object handle.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>qpdf_get_object_by_id</function>,
- <function>qpdf_make_indirect_object</function>, and
- <function>qpdf_replace_object</function>, exposing the
- corresponding methods in <classname>QPDF</classname> and
- <classname>QPDFObjectHandle</classname>.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.4.0: November 16, 2021</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Handling of Weak Cryptography Algorithms
- </para>
- <itemizedlist>
- <listitem>
- <para>
- From the qpdf CLI, the @1@option@1@--allow-weak-crypto@2@option@2@
- is now required to suppress a warning when explicitly
- creating PDF files using RC4 encryption. While qpdf will
- always retain the ability to read and write such files,
- doing so will require explicit acknowledgment moving
- forward. For qpdf 10.4, this change only affects the
- command-line tool. Starting in qpdf 11, there will be small
- API changes to require explicit acknowledgment in those
- cases as well. For additional information, see <xref
- linkend="ref.weak-crypto"/>.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Fix potential bounds error when handling shell completion
- that could occur when given bogus input.
- </para>
- </listitem>
- <listitem>
- <para>
- Properly handle overlay/underlay on completely empty pages
- (with no resource dictionary).
- </para>
- </listitem>
- <listitem>
- <para>
- Fix crash that could occur under certain conditions when
- using @1@option@1@--pages@2@option@2@ with files that had form
- fields.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Make <function>QPDF::findPage</function> functions public.
- </para>
- </listitem>
- <listitem>
- <para>
- Add methods to <classname>Pl_Flate</classname> to be able to
- receive warnings on certain recoverable conditions.
- </para>
- </listitem>
- <listitem>
- <para>
- Add an extra check to the library to detect when foreign
- objects are inserted directly (instead of using
- <function>QPDF::copyForeignObject</function>) at the time of
- insertion rather than when the file is written. Catching the
- error sooner makes it much easier to locate the incorrect
- code.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Improve diagnostics around parsing @1@option@1@--pages@2@option@2@
- command-line options
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Packaging Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The Windows binary distribution is now built with crypto
- provided by OpenSSL 3.0.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.3.2: May 8, 2021</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- When generating a file while preserving object streams,
- unreferenced objects are correctly removed unless
- @1@option@1@--preserve-unreferenced@2@option@2@ is specified.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- When adding a page that already exists, make a shallow copy
- instead of throwing an exception. This makes the library
- behavior consistent with the CLI behavior. See
- @1@filename@1@ChangeLog@2@filename@2@ for additional notes.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.3.1: March 11, 2021</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Form field copying failed on files where /DR was a direct
- object in the document-level form dictionary.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.3.0: March 4, 2021</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The code for handling form fields when copying pages from
- 10.2.0 was not quite right and didn't work in a number of
- situations, such as when the same page was copied multiple
- times or when there were conflicting resource or field names
- across multiple copies. The 10.3.0 code has been much more
- thoroughly tested with more complex cases and with a
- multitude of readers and should be much closer to correct.
- The 10.2.0 code worked well enough for page splitting or for
- copying pages with form fields into documents that didn't
- already have them but was still not quite correct in
- handling of field-level resources.
- </para>
- </listitem>
- <listitem>
- <para>
- When <function>QPDF::replaceObject</function> or
- <function>QPDF::swapObjects</function> is called, existing
- <classname>QPDFObjectHandle</classname> instances no longer
- point to the old objects. The next time they are
- accessed, they automatically notice the change to the
- underlying object and update themselves. This resolves a
- very longstanding source of confusion, albeit in a very
- rarely used method call.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix form field handling code to look for default
- appearances, quadding, and default resources in the right
- places. The code was not looking for things in the
- document-level interactive form dictionary that it was
- supposed to be finding there. This required adding a few new
- methods to <classname>QPDFFormFieldObjectHelper</classname>.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Reworked the code that handles copying annotations and form
- fields during page operations. There were additional methods
- added to the public API from 10.2.0 and a one deprecation of
- a method added in 10.2.0. The majority of the API changes
- are in methods most people would never call and that will
- hopefully be superseded by higher-level interfaces for
- handling page copies. Please see the
- @1@filename@1@ChangeLog@2@filename@2@ file for details.
- </para>
- </listitem>
- <listitem>
- <para>
- The method <function>QPDF::numWarnings</function> was added
- so that you can tell whether any warnings happened during a
- specific block of code.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.2.0: February 23, 2021</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- CLI Behavior Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Operations that work on combining pages are much better
- about protecting form fields. In particular,
- @1@option@1@--split-pages@2@option@2@ and @1@option@1@--pages@2@option@2@
- now preserve interaction form functionality by copying the
- relevant form field information from the original files.
- Additionally, if you use @1@option@1@--pages@2@option@2@ to select
- only some pages from the original input file, unused form
- fields are removed, which prevents lots of unused
- annotations from being retained.
- </para>
- </listitem>
- <listitem>
- <para>
- By default, @1@command@1@qpdf@2@command@2@ no longer allows
- creation of encrypted PDF files whose user password is
- non-empty and owner password is empty when a 256-bit key is
- in use. The @1@option@1@--allow-insecure@2@option@2@ option,
- specified inside the @1@option@1@--encrypt@2@option@2@ options,
- allows creation of such files. Behavior changes in the CLI
- are avoided when possible, but an exception was made here
- because this is security-related. qpdf must always allow
- creation of weird files for testing purposes, but it should
- not default to letting users unknowingly create insecure
- files.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Behavior Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Note: the changes in this section cause differences in
- output in some cases. These differences change the syntax of
- the PDF but do not change the semantics (meaning). I make a
- strong effort to avoid gratuitous changes in qpdf's output
- so that qpdf changes don't break people's tests. In this
- case, the changes significantly improve the readability of
- the generated PDF and don't affect any output that's
- generated by simple transformation. If you are annoyed by
- having to update test files, please rest assured that
- changes like this have been and will continue to be rare
- events.
- </para>
- </listitem>
- <listitem>
- <para>
- <function>QPDFObjectHandle::newUnicodeString</function> now
- uses whichever of ASCII, PDFDocEncoding, of UTF-16 is
- sufficient to encode all the characters in the string. This
- reduces needless encoding in UTF-16 of strings that can be
- encoded in ASCII. This change may cause qpdf to generate
- different output than before when form field values are set
- using <classname>QPDFFormFieldObjectHelper</classname> but
- does not change the meaning of the output.
- </para>
- </listitem>
- <listitem>
- <para>
- The code that places form XObjects and also the code that
- flattens rotations trim trailing zeroes from real numbers
- that they calculate. This causes slight (but semantically
- equivalent) differences in generated appearance streams and
- form XObject invocations in overlay/underlay code or in user
- code that calls the methods that place form XObjects on a
- page.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add new command line options for listing, saving, adding,
- removing, and and copying file attachments. See <xref
- linkend="ref.attachments"/> for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Page splitting and merging operations, as well as
- @1@option@1@--flatten-rotation@2@option@2@, are better behaved
- with respect to annotations and interactive form fields. In
- most cases, interactive form field functionality and proper
- formatting and functionality of annotations is preserved by
- these operations. There are still some cases that aren't
- perfect, such as when functionality of annotations depends
- on document-level data that qpdf doesn't yet understand or
- when there are problems with referential integrity among
- form fields and annotations (e.g., when a single form field
- object or its associated annotations are shared across
- multiple pages, a case that is out of spec but that works in
- most viewers anyway).
- </para>
- </listitem>
- <listitem>
- <para>
- The option
- @1@option@1@--password-file=@1@replaceable@1@filename@2@replaceable@2@@2@option@2@
- can now be used to read the decryption password from a file.
- You can use <literal>-</literal> as the file name to read
- the password from standard input. This is an easier/more
- obvious way to read passwords from files or standard input
- than using @1@option@1@@file@2@option@2@ for this purpose.
- </para>
- </listitem>
- <listitem>
- <para>
- Add some information about attachments to the json output,
- and added <literal>attachments</literal> as an additional
- json key. The information included here is limited to the
- preferred name and content stream and a reference to the
- file spec object. This is enough detail for clients to avoid
- the hassle of navigating a name tree and provides what is
- needed for basic enumeration and extraction of attachments.
- More detailed information can be obtained by following the
- reference to the file spec object.
- </para>
- </listitem>
- <listitem>
- <para>
- Add numeric option to @1@option@1@--collate@2@option@2@. If
- @1@option@1@--collate=@1@replaceable@1@n@2@replaceable@2@@2@option@2@ is
- given, take pages in groups of @1@replaceable@1@n@2@replaceable@2@
- from the given files.
- </para>
- </listitem>
- <listitem>
- <para>
- It is now valid to provide @1@option@1@--rotate=0@2@option@2@ to
- clear rotation from a page.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- This release includes numerous additions to the API. Not all
- changes are listed here. Please see the
- @1@filename@1@ChangeLog@2@filename@2@ file in the source
- distribution for a comprehensive list. Highlights appear
- below.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>QPDFObjectHandle::ditems()</function> and
- <function>QPDFObjectHandle::aitems()</function> that enable
- C++-style iteration, including range-for iteration, over
- dictionary and array QPDFObjectHandles. See comments in
- @1@filename@1@include/qpdf/QPDFObjectHandle.hh@2@filename@2@ and
- @1@filename@1@examples/pdf-name-number-tree.cc@2@filename@2@ for
- details.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>QPDFObjectHandle::copyStream</function> for
- making a copy of a stream within the same
- <classname>QPDF</classname> instance.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new helper classes for supporting file attachments, also
- known as embedded files. New classes are
- <classname>QPDFEmbeddedFileDocumentHelper</classname>,
- <classname>QPDFFileSpecObjectHelper</classname>, and
- <classname>QPDFEFStreamObjectHelper</classname>. See their
- respective headers for details and
- @1@filename@1@examples/pdf-attach-file.cc@2@filename@2@ for an
- example.
- </para>
- </listitem>
- <listitem>
- <para>
- Add a version of
- <function>QPDFObjectHandle::parse</function> that takes a
- <classname>QPDF</classname> pointer as context so that it
- can parse strings containing indirect object references.
- This is illustrated in
- @1@filename@1@examples/pdf-attach-file.cc@2@filename@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- Re-implement <classname>QPDFNameTreeObjectHelper</classname>
- and <classname>QPDFNumberTreeObjectHelper</classname> to be
- more efficient, add an iterator-based API, give them the
- capability to repair broken trees, and create methods for
- modifying the trees. With this change, qpdf has a robust
- read/write implementation of name and number trees.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new versions of
- <function>QPDFObjectHandle::replaceStreamData</function>
- that take <classname>std::function</classname> objects for
- cases when you need something between a static string and a
- full-fledged StreamDataProvider. Using this with
- <function>QUtil::file_provider</function> is a very easy way
- to create a stream from the contents of a file.
- </para>
- </listitem>
- <listitem>
- <para>
- The <classname>QPDFMatrix</classname> class, formerly a
- private, internal class, has been added to the public API.
- See @1@filename@1@include/qpdf/QPDFMatrix.hh@2@filename@2@ for
- details. This class is for working with transformation
- matrices. Some methods in
- <classname>QPDFPageObjectHelper</classname> make use of this
- to make information about transformation matrices available.
- For an example, see
- @1@filename@1@examples/pdf-overlay-page.cc@2@filename@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- Several new methods were added to
- <classname>QPDFAcroFormDocumentHelper</classname> for
- adding, removing, getting information about, and enumerating
- form fields.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFAcroFormDocumentHelper::transformAnnotations</function>,
- which applies a transformation to each annotation on a page.
- </para>
- </listitem>
- <listitem>
- <para>
- Add
- <function>QPDFPageObjectHelper::copyAnnotations</function>,
- which copies annotations and, if applicable, associated form
- fields, from one page to another, possibly transforming the
- rectangles.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- A C++-14 compiler is now required to build qpdf. There is no
- intention to require anything newer than that for a while.
- C++-14 includes modest enhancements to C++-11 and appears to
- be supported about as widely as C++-11.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The @1@option@1@--flatten-rotation@2@option@2@ option applies
- transformations to any annotations that may be on the page.
- </para>
- </listitem>
- <listitem>
- <para>
- If a form XObject lacks a resources dictionary, consider any
- names in that form XObject to be referenced from the
- containing page. This is compliant with older PDF versions.
- Also detect if any form XObjects have any unresolved names
- and, if so, don't remove unreferenced resources from them or
- from the page that contains them. Unfortunately this has the
- side effect of preventing removal of unreferenced resources
- in some cases where names appear that don't refer to
- resources, such as with tagged PDF. This is a bit of a
- corner case that is not likely to cause a significant
- problem in practice, but the only side effect would be lack
- of removal of shared resources. A future version of qpdf may
- be more sophisticated in its detection of names that refer
- to resources.
- </para>
- </listitem>
- <listitem>
- <para>
- Properly handle strings if they appear in inline image
- dictionaries while externalizing inline images.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.1.0: January 5, 2021</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add @1@option@1@--flatten-rotation@2@option@2@ command-line option,
- which causes all pages that are rotated using parameters in
- the page's dictionary to instead be identically rotated in
- the page's contents. The change is not user-visible for
- compliant PDF readers but can be used to work around broken
- PDF applications that don't properly handle page rotation.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Support for user-provided (pluggable, modular) stream
- filters. It is now possible to derive a class from
- <classname>QPDFStreamFilter</classname> and register it with
- <classname>QPDF</classname> so that regular library methods,
- including those used by <classname>QPDFWriter</classname>,
- can decode streams with filters not directly supported by
- the library. The example
- @1@filename@1@examples/pdf-custom-filter.cc@2@filename@2@
- illustrates how to use this capability.
- </para>
- </listitem>
- <listitem>
- <para>
- Add methods to <classname>QPDFPageObjectHelper</classname>
- to iterate through XObjects on a page or form XObjects,
- possibly recursing into nested form XObjects:
- <function>forEachXObject</function>,
- <function>ForEachImage</function>,
- <function>forEachFormXObject</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- Enhance several methods in
- <classname>QPDFPageObjectHelper</classname> to work with
- form XObjects as well as pages, as noted in comments. See
- @1@filename@1@ChangeLog@2@filename@2@ for a full list.
- </para>
- </listitem>
- <listitem>
- <para>
- Rename some functions in
- <classname>QPDFPageObjectHelper</classname>, while keeping
- old names for compatibility:
- <itemizedlist>
- <listitem>
- <para>
- <function>getPageImages</function> to
- <function>getImages</function>
- </para>
- </listitem>
- <listitem>
- <para>
- <function>filterPageContents</function> to
- <function>filterContents</function>
- </para>
- </listitem>
- <listitem>
- <para>
- <function>pipePageContents</function> to
- <function>pipeContents</function>
- </para>
- </listitem>
- <listitem>
- <para>
- <function>parsePageContents</function> to
- <function>parseContents</function>
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFPageObjectHelper::getFormXObjects</function>
- to return a map of form XObjects directly on a page or form
- XObject
- </para>
- </listitem>
- <listitem>
- <para>
- Add new helper methods to
- <classname>QPDFObjectHandle</classname>:
- <function>isFormXObject</function>, <function>isImage</function>
- </para>
- </listitem>
- <listitem>
- <para>
- Add the optional <function>allow_streams</function>
- parameter <function>QPDFObjectHandle::makeDirect</function>.
- When <function>QPDFObjectHandle::makeDirect</function> is
- called in this way, it preserves references to streams
- rather than throwing an exception.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>QPDFObjectHandle::setFilterOnWrite</function>
- method. Calling this on a stream prevents
- <function>QPDFWriter</function> from attempting to
- uncompress, recompress, or otherwise filter a stream even if
- it could. Developers can use this to protect streams that
- are optimized should be protected from
- <classname>QPDFWriter</classname>'s default behavior for any
- other reason.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <classname>ostream</classname>
- <literal>&lt;&lt;</literal> operator for
- <classname>QPDFObjGen</classname>. This is useful to have
- for debugging.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFPageObjectHelper::flattenRotation</function>,
- which replaces a page's <literal>/Rotate</literal> keyword
- by rotating the page within the content stream and altering
- the page's bounding boxes so the rendering is the same. This
- can be used to work around buggy PDF readers that can't
- properly handle page rotation.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- C API Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add several new functions to the C API for working with
- objects. These are wrappers around many of the methods in
- <classname>QPDFObjectHandle</classname>. Their inclusion
- adds considerable new capability to the C API.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>qpdf_register_progress_reporter</function> to
- the C API, corresponding to
- <function>QPDFWriter::registerProgressReporter</function>.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Performance Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Improve steps <classname>QPDFWriter</classname> takes to
- prepare a <classname>QPDF</classname> object for writing,
- resulting in about an 8% improvement in write performance
- while allowing indirect objects to appear in
- <literal>/DecodeParms</literal>.
- </para>
- </listitem>
- <listitem>
- <para>
- When extracting pages, the @1@command@1@qpdf@2@command@2@ CLI only
- removes unreferenced resources from the pages that are being
- kept, resulting in a significant performance improvement
- when extracting small numbers of pages from large, complex
- documents.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <function>QPDFPageObjectHelper::externalizeInlineImages</function>
- was not externalizing images referenced from form XObjects
- that appeared on the page.
- </para>
- </listitem>
- <listitem>
- <para>
- <function>QPDFObjectHandle::filterPageContents</function>
- was broken for pages with multiple content streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Tweak zsh completion code to behave a little better with
- respect to path completion.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.0.4: November 21, 2020</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Fix a handful of integer overflows. This includes cases
- found by fuzzing as well as having qpdf not do range
- checking on unused values in the xref stream.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.0.3: October 31, 2020</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The fix to the bug involving copying streams with indirect
- filters was incorrect and introduced a new, more serious
- bug. The original bug has been fixed correctly, as has the
- bug introduced in 10.0.2.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.0.2: October 27, 2020</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- When concatenating content streams, as with
- @1@option@1@--coalesce-contents@2@option@2@, there were cases in
- which qpdf would merge two lexical tokens together, creating
- invalid results. A newline is now inserted between
- merged content streams if one is not already present.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix an internal error that could occur when copying foreign
- streams whose stream data had been replaced using a stream
- data provider if those streams had indirect filters or
- decode parameters. This is a rare corner case.
- </para>
- </listitem>
- <listitem>
- <para>
- Ensure that the caller's locale settings do not change the
- results of numeric conversions performed internally by the
- qpdf library. Note that the problem here could only be
- caused when the qpdf library was used programmatically.
- Using the qpdf CLI already ignored the user's locale for
- numeric conversion.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix several instances in which warnings were not suppressed
- in spite of @1@option@1@--no-warn@2@option@2@ and/or errors or
- warnings were written to standard output rather than
- standard error.
- </para>
- </listitem>
- <listitem>
- <para>
- Fixed a memory leak that could occur under specific
- circumstances when
- @1@option@1@--object-streams=generate@2@option@2@ was used.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix various integer overflows and similar conditions found
- by the OSS-Fuzz project.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- New option @1@option@1@--warning-exit-0@2@option@2@ causes qpdf to
- exit with a status of <literal>0</literal> rather than
- <literal>3</literal> if there are warnings but no errors.
- Combine with @1@option@1@--no-warn@2@option@2@ to completely ignore
- warnings.
- </para>
- </listitem>
- <listitem>
- <para>
- Performance improvements have been made to
- <function>QPDF::processMemoryFile</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- The OpenSSL crypto provider produces more detailed error
- messages.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The option @1@option@1@--disable-rpath@2@option@2@ is now supported
- by qpdf's @1@command@1@./configure@2@command@2@ script. Some
- distributions' packaging standards recommended the use of
- this option.
- </para>
- </listitem>
- <listitem>
- <para>
- Selection of a printf format string for <type>long
- long</type> has been moved from <literal>ifdefs</literal> to
- an autoconf test. If you are using your own build system,
- you will need to provide a value for
- <literal>LL_FMT</literal> in
- @1@filename@1@libqpdf/qpdf/qpdf-config.h@2@filename@2@, which would
- typically be <literal>&quot;%lld&quot;</literal> or, for
- some Windows compilers, <literal>&quot;%I64d&quot;</literal>.
- </para>
- </listitem>
- <listitem>
- <para>
- Several improvements were made to build-time configuration
- of the OpenSSL crypto provider.
- </para>
- </listitem>
- <listitem>
- <para>
- A nearly stand-alone Linux binary zip file is now included
- with the qpdf release. This is built on an older (but
- supported) Ubuntu LTS release, but would work on most
- reasonably recent Linux distributions. It contains only the
- executables and required shared libraries that would not be
- present on a minimal system. It can be used for including
- qpdf in a minimal environment, such as a docker container.
- The zip file is also known to work as a layer in AWS Lambda.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF's automated build has been migrated from Azure
- Pipelines to GitHub Actions.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Windows-specific Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The Windows executables distributed with qpdf releases now
- use the OpenSSL crypto provider by default. The native
- crypto provider is also compiled in and can be selected at
- runtime with the <literal>QPDF_CRYPTO_PROVIDER</literal>
- environment variable.
- </para>
- </listitem>
- <listitem>
- <para>
- Improvements have been made to how a cryptographic provider
- is obtained in the native Windows crypto implementation.
- However mostly this is shadowed by OpenSSL being used by
- default.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.0.1: April 9, 2020</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- 10.0.0 introduced a bug in which calling
- <function>QPDFObjectHandle::getStreamData</function> on a
- stream that can't be filtered was returning the raw data
- instead of throwing an exception. This is now fixed.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix a bug that was preventing qpdf from linking with some
- versions of clang on some platforms.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Improve the @1@filename@1@pdf-invert-images@2@filename@2@ example
- to avoid having to load all the images into RAM at the same
- time.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>10.0.0: April 6, 2020</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Performance Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The qpdf library and executable should run much faster in
- this version than in the last several releases. Several
- internal library optimizations have been made, and there has
- been improved behavior on page splitting as well. This
- version of qpdf should outperform any of the 8.x or 9.x
- versions.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Incompatible API (source-level) Changes (minor)
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The <function>QUtil::srandom</function> method was removed.
- It didn't do anything unless insecure random numbers were
- compiled in, and they have been off by default for a long
- time. If you were calling it, just remove the call since it
- wasn't doing anything anyway.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build/Packaging Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add a <literal>openssl</literal> crypto provider, which is
- implemented with OpenSSL and also works with BoringSSL.
- Thanks to Dean Scarff for this contribution. If you maintain
- qpdf for a distribution, pay special attention to make sure
- that you are including support for the crypto providers you
- want. Package maintainers will have to weigh the advantages
- of allowing users to pick a crypto provider at runtime
- against the disadvantages of adding more dependencies to
- qpdf.
- </para>
- </listitem>
- <listitem>
- <para>
- Allow qpdf to built on stripped down systems whose C/C++
- libraries lack the <classname>wchar_t</classname> type.
- Search for <classname>wchar_t</classname> in qpdf's
- README.md for details. This should be very rare, but it is
- known to be helpful in some embedded environments.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add <literal>objectinfo</literal> key to the JSON output.
- This will be a place to put computed metadata or other
- information about PDF objects that are not immediately
- evident in other ways or that seem useful for some other
- reason. In this version, information is provided about each
- object indicating whether it is a stream and, if so, what
- its length and filters are. Without this, it was not
- possible to tell conclusively from the JSON output alone
- whether or not an object was a stream. Run @1@command@1@qpdf
- --json-help@2@command@2@ for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new option
- @1@option@1@--remove-unreferenced-resources@2@option@2@ which takes
- <literal>auto</literal>, <literal>yes</literal>, or
- <literal>no</literal> as arguments. The new
- <literal>auto</literal> mode, which is the default, performs
- a fast heuristic over a PDF file when splitting pages to
- determine whether the expensive process of finding and
- removing unreferenced resources is likely to be of benefit.
- For most files, this new default will result in a
- significant performance improvement for splitting pages. See
- <xref linkend="ref.advanced-transformation"/> for a more
- detailed discussion.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--preserve-unreferenced-resources@2@option@2@ is
- now just a synonym for
- @1@option@1@--remove-unreferenced-resources=no@2@option@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- If the <literal>QPDF_EXECUTABLE</literal> environment
- variable is set when invoking @1@command@1@qpdf
- --bash-completion@2@command@2@ or @1@command@1@qpdf
- --zsh-completion@2@command@2@, the completion command that it
- outputs will refer to qpdf using the value of that variable
- rather than what @1@command@1@qpdf@2@command@2@ determines its
- executable path to be. This can be useful when wrapping
- @1@command@1@qpdf@2@command@2@ with a script, working with a
- version in the source tree, using an AppImage, or other
- situations where there is some indirection.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Random number generation is now delegated to the crypto
- provider. The old behavior is still used by the native
- crypto provider. It is still possible to provide your own
- random number generator.
- </para>
- </listitem>
- <listitem>
- <para>
- Add a new version of
- <function>QPDFObjectHandle::StreamDataProvider::provideStreamData</function>
- that accepts the <function>suppress_warnings</function> and
- <function>will_retry</function> options and allows a success
- code to be returned. This makes it possible to implement a
- <classname>StreamDataProvider</classname> that calls
- <function>pipeStreamData</function> on another stream and to
- pass the response back to the caller, which enables better
- error handling on those proxied streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Update <function>QPDFObjectHandle::pipeStreamData</function>
- to return an overall success code that goes beyond whether
- or not filtered data was written successfully. This allows
- better error handling of cases that were not filtering
- errors. You have to call this explicitly. Methods in
- previously existing APIs have the same semantics as before.
- </para>
- </listitem>
- <listitem>
- <para>
- The
- <function>QPDFPageObjectHelper::placeFormXObject</function>
- method now allows separate control over whether it should be
- willing to shrink or expand objects to fit them better into
- the destination rectangle. The previous behavior was that
- shrinking was allowed but expansion was not. The previous
- behavior is still the default.
- </para>
- </listitem>
- <listitem>
- <para>
- When calling the C API, any non-zero value passed to a
- boolean parameter is treated as <literal>TRUE</literal>.
- Previously only the value <literal>1</literal> was accepted.
- This makes the C API behave more like most C interfaces and
- is known to improve compatibility with some Windows
- environments that dynamically load the DLL and call
- functions from it.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>QPDFObjectHandle::unsafeShallowCopy</function>
- for copying only top-level dictionary keys or array items.
- This is unsafe because it creates a situation in which
- changing a lower-level item in one object may also change it
- in another object, but for cases in which you
- <emphasis>know</emphasis> you are only inserting or
- replacing top-level items, it is much faster than
- <function>QPDFObjectHandle::shallowCopy</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>QPDFObjectHandle::filterAsContents</function>,
- which filter's a stream's data as a content stream. This is
- useful for parsing the contents for form XObjects in the
- same way as parsing page content streams.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- When detecting and removing unreferenced resources during
- page splitting, traverse into form XObjects and handle their
- resources dictionaries as well.
- </para>
- </listitem>
- <listitem>
- <para>
- The same error recovery is applied to streams in other than
- the primary input file when merging or splitting pages.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>9.1.1: January 26, 2020</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Build/Packaging Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The fix-qdf program was converted from perl to C++. As such,
- qpdf no longer has a runtime dependency on perl.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Added new helper routine
- <function>QUtil::call_main_from_wmain</function> which
- converts <type>wchar_t</type> arguments to UTF-8 encoded
- strings. This is useful for qpdf because library methods
- expect file names to be UTF-8 encoded, even on Windows
- </para>
- </listitem>
- <listitem>
- <para>
- Added new <function>QUtil::read_lines_from_file</function>
- methods that take <type>FILE*</type> arguments and that
- allow preservation of end-of-line characters. This also
- fixes a bug where
- <function>QUtil::read_lines_from_file</function> wouldn't
- work properly with Unicode filenames.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Added options @1@option@1@--is-encrypted@2@option@2@ and
- @1@option@1@--requires-password@2@option@2@ for testing whether a
- file is encrypted or requires a password other than the
- supplied (or empty) password. These communicate via exit
- status, making them useful for shell scripts. They also work
- on encrypted files with unknown passwords.
- </para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- Added <literal>encrypt</literal> key to JSON options. With
- the exception of the reconstructed user password for older
- encryption formats, this provides the same information as
- @1@option@1@--show-encryption@2@option@2@ but in a consistent,
- parseable format. See output of @1@command@1@qpdf
- --json-help@2@command@2@ for details.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- In QDF mode, be sure not to write more than one XRef stream
- to a file, even when
- @1@option@1@--preserve-unreferenced@2@option@2@ is used.
- @1@command@1@fix-qdf@2@command@2@ assumes that there is only one
- XRef stream, and that it appears at the end of the file.
- </para>
- </listitem>
- <listitem>
- <para>
- When externalizing inline images, properly handle images
- whose color space is a reference to an object in the page's
- resource dictionary.
- </para>
- </listitem>
- <listitem>
- <para>
- Windows-specific fix for acquiring crypt context with a new
- keyset.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>9.1.0: November 17, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Build Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- A C++-11 compiler is now required to build qpdf.
- </para>
- </listitem>
- <listitem>
- <para>
- A new crypto provider that uses gnutls for crypto functions
- is now available and can be enabled at build time. See <xref
- linkend="ref.crypto"/> for more information about crypto
- providers and <xref linkend="ref.crypto.build"/> for
- specific information about the build.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Incorporate contribution from Masamichi Hosoda to properly
- handle signature dictionaries by not including them in
- object streams, formatting the <literal>Contents</literal>
- key has a hexadecimal string, and excluding the
- <literal>/Contents</literal> key from encryption and
- decryption.
- </para>
- </listitem>
- <listitem>
- <para>
- Incorporate contribution from Masamichi Hosoda to provide
- new API calls for getting file-level information about
- input and output files, enabling certain operations on
- the files at the file level rather than the object level.
- New methods include
- <function>QPDF::getXRefTable()</function>,
- <function>QPDFObjectHandle::getParsedOffset()</function>,
- <function>QPDFWriter::getRenumberedObjGen(QPDFObjGen)</function>,
- and <function>QPDFWriter::getWrittenXRefTable()</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- Support build-time and runtime selectable crypto providers.
- This includes the addition of new classes
- <classname>QPDFCryptoProvider</classname> and
- <classname>QPDFCryptoImpl</classname> and the recognition
- of the <literal>QPDF_CRYPTO_PROVIDER</literal> environment
- variable. Crypto providers are described in depth in <xref
- linkend="ref.crypto"/>.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Addition of the @1@option@1@--show-crypto@2@option@2@ option in
- support of selectable crypto providers, as described in
- <xref linkend="ref.crypto"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- Allow <literal>:even</literal> or <literal>:odd</literal> to
- be appended to numeric ranges for specification of the even
- or odd pages from among the pages specified in the range.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix shell wildcard expansion behavior (<literal>*</literal>
- and <literal>?</literal>) of the @1@command@1@qpdf.exe@2@command@2@
- as built my MSVC.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>9.0.2: October 12, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fix
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Fix the name of the temporary file used by
- @1@option@1@--replace-input@2@option@2@ so that it doesn't require
- path splitting and works with paths include directories.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>9.0.1: September 20, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes/Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Fix some build and test issues on big-endian systems and
- compilers with characters that are unsigned by default.
- The problems were in build and test only. There were no
- actual bugs in the qpdf library itself relating to
- endianness or unsigned characters.
- </para>
- </listitem>
- <listitem>
- <para>
- When a dictionary has a duplicated key, report this with a
- warning. The behavior of the library in this case is
- unchanged, but the error condition is no longer silently
- ignored.
- </para>
- </listitem>
- <listitem>
- <para>
- When a form field's display rectangle is erroneously
- specified with inverted coordinates, detect and correct this
- situation. This avoids some form fields from being flipped
- when flattening annotations on files with this condition.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>9.0.0: August 31, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Incompatible API (source-level) Changes (minor)
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The method <function>QUtil::strcasecmp</function> has been
- renamed to <function>QUtil::str_compare_nocase</function>.
- This incompatible change is necessary to enable qpdf to
- build on platforms that define
- <function>strcasecmp</function> as a macro.
- </para>
- </listitem>
- <listitem>
- <para>
- The <function>QPDF::copyForeignObject</function> method had
- an overloaded version that took a boolean parameter that was
- not used. If you were using this version, just omit the
- extra parameter.
- </para>
- </listitem>
- <listitem>
- <para>
- There was a version
- <function>QPDFTokenizer::expectInlineImage</function> that
- took no arguments. This version has been removed since it
- caused the tokenizer to return incorrect inline images. A
- new version was added some time ago that produces correct
- output. This is a very low level method that doesn't make
- sense to call outside of qpdf's lexical engine. There are
- higher level methods for tokenizing content streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Change
- <function>QPDFOutlineDocumentHelper::getTopLevelOutlines</function>
- and <function>QPDFOutlineObjectHelper::getKids</function> to
- return a <type>std::vector</type> instead of a
- <type>std::list</type> of
- <classname>QPDFOutlineObjectHelper</classname> objects.
- </para>
- </listitem>
- <listitem>
- <para>
- Remove method
- <function>QPDFTokenizer::allowPoundAnywhereInName</function>.
- This function would allow creation of name tokens whose
- value would change when unparsed, which is never the correct
- behavior.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- CLI Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The @1@option@1@--replace-input@2@option@2@ option may be given in
- place of an output file name. This causes qpdf to overwrite
- the input file with the output. See the description of
- @1@option@1@--replace-input@2@option@2@ in <xref
- linkend="ref.basic-options"/> for more details.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--recompress-flate@2@option@2@ instructs
- @1@command@1@qpdf@2@command@2@ to recompress streams that are
- already compressed with <literal>/FlateDecode</literal>.
- Useful with @1@option@1@--compression-level@2@option@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- The
- @1@option@1@--compression-level=@1@replaceable@1@level@2@replaceable@2@@2@option@2@
- sets the zlib compression level used for any streams
- compressed by <literal>/FlateDecode</literal>. Most
- effective when combined with
- @1@option@1@--recompress-flate@2@option@2@.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- A new namespace <classname>QIntC</classname>, provided by
- @1@filename@1@qpdf/QIntC.hh@2@filename@2@, provides safe conversion
- methods between different integer types. These conversion
- methods do range checking to ensure that the cast can be
- performed with no loss of information. Every use of
- <function>static_cast</function> in the library was
- inspected to see if it could use one of these safe
- converters instead. See <xref linkend="ref.casting"/> for
- additional details.
- </para>
- </listitem>
- <listitem>
- <para>
- Method <function>QPDF::anyWarnings</function> tells whether
- there have been any warnings without clearing the list of
- warnings.
- </para>
- </listitem>
- <listitem>
- <para>
- Method <function>QPDF::closeInputSource</function> closes or
- otherwise releases the input source. This enables the input
- file to be deleted or renamed.
- </para>
- </listitem>
- <listitem>
- <para>
- New methods have been added to <classname>QUtil</classname>
- for converting back and forth between strings and unsigned
- integers: <function>uint_to_string</function>,
- <function>uint_to_string_base</function>,
- <function>string_to_uint</function>, and
- <function>string_to_ull</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- New methods have been added to
- <classname>QPDFObjectHandle</classname> that return the
- value of <classname>Integer</classname> objects as
- <type>int</type> or <type>unsigned int</type> with range
- checking and sensible fallback values, and a new method was
- added to return an unsigned value. This makes it easier to
- write code that is safe from unintentional data loss.
- Functions: <function>getUIntValue</function>,
- <function>getIntValueAsInt</function>,
- <function>getUIntValueAsUInt</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- When parsing content streams with
- <classname>QPDFObjectHandle::ParserCallbacks</classname>, in
- place of the method
- <function>handleObject(QPDFObjectHandle)</function>, the
- developer may override
- <function>handleObject(QPDFObjectHandle, size_t offset,
- size_t length)</function>. If this method is defined, it
- will be invoked with the object along with its offset and
- length within the overall contents being parsed. Intervening
- spaces and comments are not included in offset and length.
- Additionally, a new method
- <function>contentSize(size_t)</function> may be implemented.
- If present, it will be called prior to the first call to
- <function>handleObject</function> with the total size in
- bytes of the combined contents.
- </para>
- </listitem>
- <listitem>
- <para>
- New methods <function>QPDF::userPasswordMatched</function>
- and <function>QPDF::ownerPasswordMatched</function> have
- been added to enable a caller to determine whether the
- supplied password was the user password, the owner password,
- or both. This information is also displayed by @1@command@1@qpdf
- --show-encryption@2@command@2@ and @1@command@1@qpdf
- --check@2@command@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- Static method
- <function>Pl_Flate::setCompressionLevel</function> can be
- called to set the zlib compression level globally used by
- all instances of Pl_Flate in deflate mode.
- </para>
- </listitem>
- <listitem>
- <para>
- The method
- <function>QPDFWriter::setRecompressFlate</function> can be
- called to tell <classname>QPDFWriter</classname> to
- uncompress and recompress streams already compressed with
- <literal>/FlateDecode</literal>.
- </para>
- </listitem>
- <listitem>
- <para>
- The underlying implementation of QPDF arrays has been
- enhanced to be much more memory efficient when dealing with
- arrays with lots of nulls. This enables qpdf to use
- drastically less memory for certain types of files.
- </para>
- </listitem>
- <listitem>
- <para>
- When traversing the pages tree, if nodes are encountered
- with invalid types, the types are fixed, and a warning is
- issued.
- </para>
- </listitem>
- <listitem>
- <para>
- A new helper method
- <function>QUtil::read_file_into_memory</function> was added.
- </para>
- </listitem>
- <listitem>
- <para>
- All conditions previously reported by
- <function>QPDF::checkLinearization()</function> as errors
- are now presented as warnings.
- </para>
- </listitem>
- <listitem>
- <para>
- Name tokens containing the <literal>#</literal> character
- not preceded by two hexadecimal digits, which is invalid in
- PDF 1.2 and above, are properly handled by the library: a
- warning is generated, and the name token is properly
- preserved, even if invalid, in the output. See
- @1@filename@1@ChangeLog@2@filename@2@ for a more complete
- description of this change.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- A small handful of memory issues, assertion failures, and
- unhandled exceptions that could occur on badly mangled input
- files have been fixed. Most of these problems were found by
- Google's OSS-Fuzz project.
- </para>
- </listitem>
- <listitem>
- <para>
- When @1@command@1@qpdf --check@2@command@2@ or @1@command@1@qpdf
- --check-linearization@2@command@2@ encounters a file with
- linearization warnings but not errors, it now properly exits
- with exit code 3 instead of 2.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--completion-bash@2@option@2@ and
- @1@option@1@--completion-zsh@2@option@2@ options now work properly
- when qpdf is invoked as an AppImage.
- </para>
- </listitem>
- <listitem>
- <para>
- Calling
- <function>QPDFWriter::set*EncryptionParameters</function> on
- a <classname>QPDFWriter</classname> object whose output
- filename has not yet been set no longer produces a
- segmentation fault.
- </para>
- </listitem>
- <listitem>
- <para>
- When reading encrypted files, follow the spec more closely
- regarding encryption key length. This allows qpdf to open
- encrypted files in most cases when they have invalid or
- missing /Length keys in the encryption dictionary.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- On platforms that support it, qpdf now builds with
- @1@option@1@-fvisibility=hidden@2@option@2@. If you build qpdf with
- your own build system, this is now safe to use. This
- prevents methods that are not part of the public API from
- being exported by the shared library, and makes qpdf's ELF
- shared libraries (used on Linux, MacOS, and most other UNIX
- flavors) behave more like the Windows DLL. Since the DLL
- already behaves in much this way, it is unlikely that there
- are any methods that were accidentally not exported.
- However, with ELF shared libraries, typeinfo for some
- classes has to be explicitly exported. If there are problems
- in dynamically linked code catching exceptions or
- subclassing, this could be the reason. If you see this,
- please report a bug at <ulink
- url="https://github.com/qpdf/qpdf/issues/">https://github.com/qpdf/qpdf/issues/</ulink>.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF is now compiled with integer conversion and sign
- conversion warnings enabled. Numerous changes were made to
- the library to make this safe.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF's @1@command@1@make install@2@command@2@ target explicitly
- specifies the mode to use when installing files instead of
- relying the user's umask. It was previously doing this for
- some files but not others.
- </para>
- </listitem>
- <listitem>
- <para>
- If @1@command@1@pkg-config@2@command@2@ is available, use it to
- locate @1@filename@1@libjpeg@2@filename@2@ and
- @1@filename@1@zlib@2@filename@2@ dependencies, falling back on old
- behavior if unsuccessful.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Other Notes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- QPDF has been fully integrated into <ulink
- url="https://github.com/google/oss-fuzz">Google's OSS-Fuzz
- project</ulink>. This project exercises code with randomly
- mutated inputs and is great for discovering hidden security
- crashes and security issues. Several bugs found by oss-fuzz
- have already been fixed in qpdf.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.4.2: May 18, 2019</term>
- <listitem>
- <para>
- This release has just one change: correction of a buffer overrun
- in the Windows code used to open files. Windows users should
- take this update. There are no code changes that affect
- non-Windows releases.
- </para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.4.1: April 27, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- When @1@command@1@qpdf --version@2@command@2@ is run, it will
- detect if the qpdf CLI was built with a different version of
- qpdf than the library, which may indicate a problem with the
- installation.
- </para>
- </listitem>
- <listitem>
- <para>
- New option @1@option@1@--remove-page-labels@2@option@2@ will remove page
- labels before generating output. This used to happen if you
- ran @1@command@1@qpdf --empty --pages .. --@2@command@2@, but the
- behavior changed in qpdf 8.3.0. This option enables people
- who were relying on the old behavior to get it again.
- </para>
- </listitem>
- <listitem>
- <para>
- New option
- @1@option@1@--keep-files-open-threshold=@1@replaceable@1@count@2@replaceable@2@@2@option@2@
- can be used to override number of files that qpdf will use
- to trigger the behavior of not keeping all files open when
- merging files. This may be necessary if your system allows
- fewer than the default value of 200 files to be open at the
- same time.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Handle Unicode characters in filenames on Windows. The
- changes to support Unicode on the CLI in Windows broke
- Unicode filenames for Windows.
- </para>
- </listitem>
- <listitem>
- <para>
- Slightly tighten logic that determines whether an object is
- a page. This should resolve problems in some rare files
- where some non-page objects were passing qpdf's test for
- whether something was a page, thus causing them to be
- erroneously lost during page splitting operations.
- </para>
- </listitem>
- <listitem>
- <para>
- Revert change that included preservation of outlines
- (bookmarks) in @1@option@1@--split-pages@2@option@2@. The way it
- was implemented in 8.3.0 and 8.4.0 caused a very significant
- degradation of performance for splitting certain files. A
- future release of qpdf may re-introduce the behavior in a
- more performant and also more correct fashion.
- </para>
- </listitem>
- <listitem>
- <para>
- In JSON mode, add missing leading 0 to decimal values
- between -1 and 1 even if not present in the input. The JSON
- specification requires the leading 0. The PDF specification
- does not.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.4.0: February 1, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Command-line Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <emphasis>Non-compatible CLI change:</emphasis> The qpdf
- command-line tool interprets passwords given at the
- command-line differently from previous releases when the
- passwords contain non-ASCII characters. In some cases, the
- behavior differs from previous releases. For a discussion of
- the current behavior, please see <xref
- linkend="ref.unicode-passwords"/>. The incompatibilities are
- as follows:
- <itemizedlist>
- <listitem>
- <para>
- On Windows, qpdf now receives all command-line options as
- Unicode strings if it can figure out the appropriate
- compile/link options. This is enabled at least for MSVC
- and mingw builds. That means that if non-ASCII strings
- are passed to the qpdf CLI in Windows, qpdf will now
- correctly receive them. In the past, they would have
- either been encoded as Windows code page 1252 (also known
- as "Windows ANSI" or as something
- unintelligible. In almost all cases, qpdf is able to
- properly interpret Unicode arguments now, whereas in the
- past, it would almost never interpret them properly. The
- result is that non-ASCII passwords given to the qpdf CLI
- on Windows now have a much greater chance of creating PDF
- files that can be opened by a variety of readers. In the
- past, usually files encrypted from the Windows CLI using
- non-ASCII passwords would not be readable by most
- viewers. Note that the current version of qpdf is able to
- decrypt files that it previously created using the
- previously supplied password.
- </para>
- </listitem>
- <listitem>
- <para>
- The PDF specification requires passwords to be encoded as
- UTF-8 for 256-bit encryption and with PDF Doc encoding
- for 40-bit or 128-bit encryption. Older versions of qpdf
- left it up to the user to provide passwords with the
- correct encoding. The qpdf CLI now detects when a
- password is given with UTF-8 encoding and automatically
- transcodes it to what the PDF spec requires. While this
- is almost always the correct behavior, it is possible to
- override the behavior if there is some reason to do so.
- This is discussed in more depth in <xref
- linkend="ref.unicode-passwords"/>.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- <listitem>
- <para>
- New options @1@option@1@--externalize-inline-images@2@option@2@,
- @1@option@1@--ii-min-bytes@2@option@2@, and
- @1@option@1@--keep-inline-images@2@option@2@ control qpdf's
- handling of inline images and possible conversion of them to
- regular images. By default,
- @1@option@1@--optimize-images@2@option@2@ now also applies to
- inline images. These options are discussed in <xref
- linkend="ref.advanced-transformation"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- Add options @1@option@1@--overlay@2@option@2@ and
- @1@option@1@--underlay@2@option@2@ for overlaying or underlaying
- pages of other files onto output pages. See <xref
- linkend="ref.overlay-underlay"/> for details.
- </para>
- </listitem>
- <listitem>
- <para>
- When opening an encrypted file with a password, if the
- specified password doesn't work and the password contains
- any non-ASCII characters, qpdf will try a number of
- alternative passwords to try to compensate for possible
- character encoding errors. This behavior can be suppressed
- with the @1@option@1@--suppress-password-recovery@2@option@2@
- option. See <xref linkend="ref.unicode-passwords"/> for a
- full discussion.
- </para>
- </listitem>
- <listitem>
- <para>
- Add the @1@option@1@--password-mode@2@option@2@ option to fine-tune
- how qpdf interprets password arguments, especially when they
- contain non-ASCII characters. See <xref
- linkend="ref.unicode-passwords"/> for more information.
- </para>
- </listitem>
- <listitem>
- <para>
- In the @1@option@1@--pages@2@option@2@ option, it is now possible
- to copy the same page more than once from the same file
- without using the previous workaround of specifying two
- different paths to the same file.
- </para>
- </listitem>
- <listitem>
- <para>
- In the @1@option@1@--pages@2@option@2@ option, allow use of
- "." as a shortcut for the primary input file.
- That way, you can do @1@command@1@qpdf in.pdf --pages . 1-2 --
- out.pdf@2@command@2@ instead of having to repeat
- @1@filename@1@in.pdf@2@filename@2@ in the command.
- </para>
- </listitem>
- <listitem>
- <para>
- When encrypting with 128-bit and 256-bit encryption, new
- encryption options @1@option@1@--assemble@2@option@2@,
- @1@option@1@--annotate@2@option@2@, @1@option@1@--form@2@option@2@, and
- @1@option@1@--modify-other@2@option@2@ allow more fine-grained
- granularity in configuring options. Before, the
- @1@option@1@--modify@2@option@2@ option only configured certain
- predefined groups of permissions.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes and Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <emphasis>Potential data-loss bug:</emphasis> Versions of
- qpdf between 8.1.0 and 8.3.0 had a bug that could cause page
- splitting and merging operations to drop some font or image
- resources if the PDF file's internal structure shared these
- resource lists across pages and if some but not all of the
- pages in the output did not reference all the fonts and
- images. Using the
- @1@option@1@--preserve-unreferenced-resources@2@option@2@ option
- would work around the incorrect behavior. This bug was the
- result of a typo in the code and a deficiency in the test
- suite. The case that triggered the error was known, just not
- handled properly. This case is now exercised in qpdf's test
- suite and properly handled.
- </para>
- </listitem>
- <listitem>
- <para>
- When optimizing images, detect and refuse to optimize
- images that can't be converted to JPEG because of bit depth
- or color space.
- </para>
- </listitem>
- <listitem>
- <para>
- Linearization and page manipulation APIs now detect and
- recover from files that have duplicate Page objects in the
- pages tree.
- </para>
- </listitem>
- <listitem>
- <para>
- Using older option @1@option@1@--stream-data=compress@2@option@2@
- with object streams, object streams and xref streams were
- not compressed.
- </para>
- </listitem>
- <listitem>
- <para>
- When the tokenizer returns inline image tokens, delimiters
- following <literal>ID</literal> and <literal>EI</literal>
- operators are no longer excluded. This makes it possible to
- reliably extract the actual image data.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add method
- <function>QPDFPageObjectHelper::externalizeInlineImages</function>
- to convert inline images to regular images.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QUtil::possible_repaired_encodings()</function> to
- generate a list of strings that represent other ways the
- given string could have been encoded. This is the method the
- QPDF CLI uses to generate the strings it tries when
- recovering incorrectly encoded Unicode passwords.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new versions of
- <function>QPDFWriter::setR{3,4,5,6}EncryptionParameters</function>
- that allow more granular setting of permissions bits. See
- @1@filename@1@QPDFWriter.hh@2@filename@2@ for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new versions of the transcoders from UTF-8 to
- single-byte coding systems in <classname>QUtil</classname>
- that report success or failure rather than just substituting
- a specified unknown character.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method <function>QUtil::analyze_encoding()</function> to
- determine whether a string has high-bit characters and is
- appears to be UTF-16 or valid UTF-8 encoding.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method
- <function>QPDFPageObjectHelper::shallowCopyPage()</function>
- to copy a new page that is a "shallow copy" of a
- page. The resulting object is an indirect object ready to be
- passed to
- <function>QPDFPageDocumentHelper::addPage()</function> for
- either the original <classname>QPDF</classname> object or a
- different one. This is what the @1@command@1@qpdf@2@command@2@
- command-line tool uses to copy the same page multiple times
- from the same file during splitting and merging operations.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method <function>QPDF::getUniqueId()</function>, which
- returns a unique identifier for the given QPDF object. The
- identifier will be unique across the life of the
- application. The returned value can be safely used as a map
- key.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method <function>QPDF::setImmediateCopyFrom</function>.
- This further enhances qpdf's ability to allow a
- <classname>QPDF</classname> object from which objects are
- being copied to go out of scope before the destination
- object is written. If you call this method on a
- <classname>QPDF</classname> instances, objects copied
- <emphasis>from</emphasis> this instance will be copied
- immediately instead of lazily. This option uses more memory
- but allows the source object to go out of scope before the
- destination object is written in all cases. See comments in
- @1@filename@1@QPDF.hh@2@filename@2@ for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFPageObjectHelper::getAttribute</function> for
- retrieving an attribute from the page dictionary taking
- inheritance into consideration, and optionally making a copy
- if your intention is to modify the attribute.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix long-standing limitation of
- <function>QPDFPageObjectHelper::getPageImages</function> so
- that it now properly reports images from inherited resources
- dictionaries, eliminating the need to call
- <function>QPDFPageDocumentHelper::pushInheritedAttributesToPage</function>
- in this case.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFObjectHandle::getUniqueResourceName</function>
- for finding an unused name in a resource dictionary.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFPageObjectHelper::getFormXObjectForPage</function>
- for generating a form XObject equivalent to a page. The
- resulting object can be used in the same file or copied to
- another file with <function>copyForeignObject</function>.
- This can be useful for implementing underlay, overlay, n-up,
- thumbnails, or any other functionality requiring replication
- of pages in other contexts.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method
- <function>QPDFPageObjectHelper::placeFormXObject</function>
- for generating content stream text that places a given form
- XObject on a page, centered and fit within a specified
- rectangle. This method takes care of computing the proper
- transformation matrix and may optionally compensate for
- rotation or scaling of the destination page.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build Improvements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add new configure option
- @1@option@1@--enable-avoid-windows-handle@2@option@2@, which causes
- the preprocessor symbol
- <literal>AVOID_WINDOWS_HANDLE</literal> to be defined. When
- defined, qpdf will avoid referencing the Windows
- <classname>HANDLE</classname> type, which is disallowed with
- certain versions of the Windows SDK.
- </para>
- </listitem>
- <listitem>
- <para>
- For Windows builds, attempt to determine what options, if
- any, have to be passed to the compiler and linker to enable
- use of <function>wmain</function>. This causes the
- preprocessor symbol <literal>WINDOWS_WMAIN</literal> to be
- defined. If you do your own builds with other compilers, you
- can define this symbol to cause <function>wmain</function>
- to be used. This is needed to allow the Windows
- @1@command@1@qpdf@2@command@2@ command to receive Unicode
- command-line options.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.3.0: January 7, 2019</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Command-line Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Shell completion: you can now use eval @1@command@1@$(qpdf
- --completion-bash)@2@command@2@ and eval @1@command@1@$(qpdf
- --completion-zsh)@2@command@2@ to enable shell completion for
- bash and zsh.
- </para>
- </listitem>
- <listitem>
- <para>
- Page numbers (also known as page labels) are now preserved
- when merging and splitting files with the
- @1@option@1@--pages@2@option@2@ and @1@option@1@--split-pages@2@option@2@
- options.
- </para>
- </listitem>
- <listitem>
- <para>
- Bookmarks are partially preserved when splitting pages with
- the @1@option@1@--split-pages@2@option@2@ option. Specifically, the
- outlines dictionary and some supporting metadata are copied
- into the split files. The result is that all bookmarks from
- the original file appear, those that point to pages that are
- preserved work, and those that point to pages that are not
- preserved don't do anything. This is an interim step toward
- proper support for bookmarks in splitting and merging
- operations.
- </para>
- </listitem>
- <listitem>
- <para>
- Page collation: add new option @1@option@1@--collate@2@option@2@.
- When specified, the semantics of @1@option@1@--pages@2@option@2@
- change from concatenation to collation. See <xref
- linkend="ref.page-selection"/> for examples and discussion.
- </para>
- </listitem>
- <listitem>
- <para>
- Generation of information in JSON format, primarily to
- facilitate use of qpdf from languages other than C++. Add
- new options @1@option@1@--json@2@option@2@,
- @1@option@1@--json-key@2@option@2@, and
- @1@option@1@--json-object@2@option@2@ to generate a JSON
- representation of the PDF file. Run @1@command@1@qpdf
- --json-help@2@command@2@ to get a description of the JSON
- format. For more information, see <xref linkend="ref.json"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--generate-appearances@2@option@2@ flag will cause
- qpdf to generate appearances for form fields if the PDF file
- indicates that form field appearances are out of date. This
- can happen when PDF forms are filled in by a program that
- doesn't know how to regenerate the appearances of the
- filled-in fields.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--flatten-annotations@2@option@2@ flag can be used
- to <emphasis>flatten</emphasis> annotations, including form
- fields. Ordinarily, annotations are drawn separately from
- the page. Flattening annotations is the process of combining
- their appearances into the page's contents. You might want
- to do this if you are going to rotate or combine pages using
- a tool that doesn't understand about annotations. You may
- also want to use @1@option@1@--generate-appearances@2@option@2@
- when using this flag since annotations for outdated form
- fields are not flattened as that would cause loss of
- information.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--optimize-images@2@option@2@ flag tells qpdf to
- recompresses every image using DCT (JPEG) compression as
- long as the image is not already compressed with lossy
- compression and recompressing the image reduces its size.
- The additional options @1@option@1@--oi-min-width@2@option@2@,
- @1@option@1@--oi-min-height@2@option@2@, and
- @1@option@1@--oi-min-area@2@option@2@ prevent recompression of
- images whose width, height, or pixel area
- (width &#xd7; height) are below a specified
- threshold.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--show-object@2@option@2@ option can now be given
- as @1@option@1@--show-object=trailer@2@option@2@ to show the
- trailer dictionary.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes and Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- QPDF now automatically detects and recovers from dangling
- references. If a PDF file contained an indirect reference to
- a non-existent object, which is valid, when adding a new
- object to the file, it was possible for the new object to
- take the object ID of the dangling reference, thereby
- causing the dangling reference to point to the new object.
- This case is now prevented.
- </para>
- </listitem>
- <listitem>
- <para>
- Fixes to form field setting code: strings are always written
- in UTF-16 format, and checkboxes and radio buttons are
- handled properly with respect to synchronization of values
- and appearance states.
- </para>
- </listitem>
- <listitem>
- <para>
- The <function>QPDF::checkLinearization()</function> no
- longer causes the program to crash when it detects problems
- with linearization data. Instead, it issues a normal warning
- or error.
- </para>
- </listitem>
- <listitem>
- <para>
- Ordinarily qpdf treats an argument of the form
- @1@option@1@@file@2@option@2@ to mean that command-line options
- should be read from @1@filename@1@file@2@filename@2@. Now, if
- @1@filename@1@file@2@filename@2@ does not exist but
- @1@filename@1@@file@2@filename@2@ does, qpdf will treat
- @1@filename@1@@file@2@filename@2@ as a regular option. This makes
- it possible to work more easily with PDF files whose names
- happen to start with the <literal>@</literal> character.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Library Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Remove the restriction in most cases that the source QPDF
- object used in a
- <function>QPDF::copyForeignObject</function> call has to
- stick around until the destination QPDF is written. The
- exceptional case is when the source stream gets is data
- using a QPDFObjectHandle::StreamDataProvider. For a more
- in-depth discussion, see comments around
- <function>copyForeignObject</function> in
- @1@filename@1@QPDF.hh@2@filename@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method
- <function>QPDFWriter::getFinalVersion()</function>, which
- returns the PDF version that will ultimately be written to
- the final file. See comments in
- @1@filename@1@QPDFWriter.hh@2@filename@2@ for some restrictions on
- its use.
- </para>
- </listitem>
- <listitem>
- <para>
- Add several methods for transcoding strings to some of the
- character sets used in PDF files:
- <function>QUtil::utf8_to_ascii</function>,
- <function>QUtil::utf8_to_win_ansi</function>,
- <function>QUtil::utf8_to_mac_roman</function>, and
- <function>QUtil::utf8_to_utf16</function>. For the
- single-byte encodings that support only a limited character
- sets, these methods replace unsupported characters with a
- specified substitute.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new methods to
- <classname>QPDFAnnotationObjectHelper</classname> and
- <classname>QPDFFormFieldObjectHelper</classname> for
- querying flags and interpretation of different field types.
- Define constants in @1@filename@1@qpdf/Constants.h@2@filename@2@ to
- help with interpretation of flag values.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new methods
- <function>QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded</function>
- and
- <function>QPDFFormFieldObjectHelper::generateAppearance</function>
- for generating appearance streams. See discussion in
- @1@filename@1@QPDFFormFieldObjectHelper.hh@2@filename@2@ for
- limitations.
- </para>
- </listitem>
- <listitem>
- <para>
- Add two new helper functions for dealing with resource
- dictionaries:
- <function>QPDFObjectHandle::getResourceNames()</function>
- returns a list of all second-level keys, which correspond to
- the names of resources, and
- <function>QPDFObjectHandle::mergeResources()</function>
- merges two resources dictionaries as long as they have
- non-conflicting keys. These methods are useful for certain
- types of objects that resolve resources from multiple places,
- such as form fields.
- </para>
- </listitem>
- <listitem>
- <para>
- Add methods
- <function>QPDFPageDocumentHelper::flattenAnnotations()</function>
- and
- <function>QPDFAnnotationObjectHelper::getPageContentForAppearance()</function>
- for handling low-level details of annotation flattening.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new helper classes:
- <classname>QPDFOutlineDocumentHelper</classname>,
- <classname>QPDFOutlineObjectHelper</classname>,
- <classname>QPDFPageLabelDocumentHelper</classname>,
- <classname>QPDFNameTreeObjectHelper</classname>, and
- <classname>QPDFNumberTreeObjectHelper</classname>.
- </para>
- </listitem>
- <listitem>
- <para>
- Add method <function>QPDFObjectHandle::getJSON()</function>
- that returns a JSON representation of the object. Call
- <function>serialize()</function> on the result to convert it
- to a string.
- </para>
- </listitem>
- <listitem>
- <para>
- Add a simple JSON serializer. This is not a complete or
- general-purpose JSON library. It allows assembly and
- serialization of JSON structures with some restrictions,
- which are described in the header file. This is the
- serializer used by qpdf's new JSON representation.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new <classname>QPDFObjectHandle::Matrix</classname>
- class along with a few convenience methods for dealing with
- six-element numerical arrays as matrices.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method
- <function>QPDFObjectHandle::wrapInArray</function>, which returns
- the object itself if it is an array, or an array containing
- the object otherwise. This is a common construct in PDF.
- This method prevents you from having to explicitly test
- whether something is a single element or an array.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build Improvements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- It is no longer necessary to run
- @1@command@1@autogen.sh@2@command@2@ to build from a pristine
- checkout. Automatically generated files are now committed so
- that it is possible to build on platforms without autoconf
- directly from a clean checkout of the repository. The
- @1@command@1@configure@2@command@2@ script detects if the files are
- out of date when it also determines that the tools are
- present to regenerate them.
- </para>
- </listitem>
- <listitem>
- <para>
- Pull requests and the master branch are now built
- automatically in <ulink
- url="https://dev.azure.com/qpdf/qpdf/_build">Azure
- Pipelines</ulink>, which is free for open source projects.
- The build includes Linux, mac, Windows 32-bit and 64-bit
- with mingw and MSVC, and an AppImage build. Official qpdf
- releases are now built with Azure Pipelines.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Notes for Packagers
- </para>
- <itemizedlist>
- <listitem>
- <para>
- A new section has been added to the documentation with notes
- for packagers. Please see <xref linkend="ref.packaging"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- The qpdf detects out-of-date automatically generated files.
- If your packaging system automatically refreshes libtool or
- autoconf files, it could cause this check to fail. To avoid
- this problem, pass
- @1@option@1@--disable-check-autofiles@2@option@2@ to
- @1@command@1@configure@2@command@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- If you would like to have qpdf completion enabled
- automatically, you can install completion files in the
- distribution's default location. You can find sample
- completion files to install in the
- @1@filename@1@completions@2@filename@2@ directory.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.2.1: August 18, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Command-line Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add
- @1@option@1@--keep-files-open=@1@replaceable@1@[yn]@2@replaceable@2@@2@option@2@
- to override default determination of whether to keep files
- open when merging. Please see the discussion of
- @1@option@1@--keep-files-open@2@option@2@ in <xref
- linkend="ref.basic-options"/> for additional details.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.2.0: August 16, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Command-line Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add @1@option@1@--no-warn@2@option@2@ option to suppress issuing
- warning messages. If there are any conditions that would
- have caused warnings to be issued, the exit status is still
- 3.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes and Optimizations
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Performance fix: optimize page merging operation to avoid
- unnecessary open/close calls on files being merged. This
- solves a dramatic slow-down that was observed when merging
- certain types of files.
- </para>
- </listitem>
- <listitem>
- <para>
- Optimize how memory was used for the TIFF predictor,
- drastically improving performance and memory usage for files
- containing high-resolution images compressed with Flate
- using the TIFF predictor.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: end of line characters were not properly handled
- inside strings in some cases.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: using @1@option@1@--progress@2@option@2@ on very small
- files could cause an infinite loop.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- API enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add new class <classname>QPDFSystemError</classname>, derived
- from <classname>std::runtime_error</classname>, which is now
- thrown by <function>QUtil::throw_system_error</function>.
- This enables the triggering <classname>errno</classname>
- value to be retrieved.
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>ClosedFileInputSource::stayOpen</function>
- method, enabling a
- <classname>ClosedFileInputSource</classname> to stay open
- during manually indicated periods of high activity, thus
- reducing the overhead of frequent open/close operations.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Build Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- For the mingw builds, change the name of the DLL import
- library from @1@filename@1@libqpdf.a@2@filename@2@ to
- @1@filename@1@libqpdf.dll.a@2@filename@2@ to more accurately
- reflect that it is an import library rather than a static
- library. This potentially clears the way for supporting a
- static library in the future, though presently, the qpdf
- Windows build only builds the DLL and executables.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.1.0: June 23, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Usability Improvements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- When splitting files, qpdf detects fonts and images that the
- document metadata claims are referenced from a page but are
- not actually referenced and omits them from the output file.
- This change can cause a significant reduction in the size of
- split PDF files for files created by some software packages.
- In some cases, it can also make page splitting slower. Prior
- versions of qpdf would believe the document metadata and
- sometimes include all the images from all the other pages
- even though the pages were no longer present. In the
- unlikely event that the old behavior should be desired, or
- if you have a case where page splitting is very slow, the
- old behavior (and speed) can be enabled by specifying
- @1@option@1@--preserve-unreferenced-resources@2@option@2@. For
- additional details, please see <xref
- linkend="ref.advanced-transformation"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- When merging multiple PDF files, qpdf no longer leaves all
- the files open. This makes it possible to merge numbers of
- files that may exceed the operating system's limit for the
- maximum number of open files.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--rotate@2@option@2@ option's syntax has been
- extended to make the page range optional. If you specify
- @1@option@1@--rotate=@1@replaceable@1@angle@2@replaceable@2@@2@option@2@
- without specifying a page range, the rotation will be
- applied to all pages. This can be especially useful for
- adjusting a PDF created from a multi-page document that
- was scanned upside down.
- </para>
- </listitem>
- <listitem>
- <para>
- When merging multiple files, the @1@option@1@--verbose@2@option@2@
- option now prints information about each file as it operates
- on that file.
- </para>
- </listitem>
- <listitem>
- <para>
- When the @1@option@1@--progress@2@option@2@ option is specified,
- qpdf will print a running indicator of its best guess at how
- far through the writing process it is. Note that, as with
- all progress meters, it's an approximation. This option is
- implemented in a way that makes it useful for software that
- uses the qpdf library; see API Enhancements below.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Properly decrypt files that use revision 3 of the standard
- security handler but use 40 bit keys (even though revision 3
- supports 128-bit keys).
- </para>
- </listitem>
- <listitem>
- <para>
- Limit depth of nested data structures to prevent crashes
- from certain types of malformed (malicious) PDFs.
- </para>
- </listitem>
- <listitem>
- <para>
- In "newline before endstream" mode, insert the
- required extra newline before the
- <literal>endstream</literal> at the end of object streams.
- This one case was previously omitted.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- API Enhancements
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The first round of higher level "helper"
- interfaces has been introduced. These are designed to
- provide a more convenient way of interacting with certain
- document features than using
- <classname>QPDFObjectHandle</classname> directly. For
- details on helpers, see <xref
- linkend="ref.helper-classes"/>. Specific additional
- interfaces are described below.
- </para>
- </listitem>
- <listitem>
- <para>
- Add two new document helper classes:
- <classname>QPDFPageDocumentHelper</classname> for working
- with pages, and
- <classname>QPDFAcroFormDocumentHelper</classname> for
- working with interactive forms. No old methods have been
- removed, but <classname>QPDFPageDocumentHelper</classname>
- is now the preferred way to perform operations on pages
- rather than calling the old methods in
- <classname>QPDFObjectHandle</classname> and
- <classname>QPDF</classname> directly. Comments in the header
- files direct you to the new interfaces. Please see the
- header files and @1@filename@1@ChangeLog@2@filename@2@ for
- additional details.
- </para>
- </listitem>
- <listitem>
- <para>
- Add three new object helper class:
- <classname>QPDFPageObjectHelper</classname> for pages,
- <classname>QPDFFormFieldObjectHelper</classname> for
- interactive form fields, and
- <classname>QPDFAnnotationObjectHelper</classname> for
- annotations. All three classes are fairly sparse at the
- moment, but they have some useful, basic functionality.
- </para>
- </listitem>
- <listitem>
- <para>
- A new example program
- @1@filename@1@examples/pdf-set-form-values.cc@2@filename@2@ has
- been added that illustrates use of the new document and
- object helpers.
- </para>
- </listitem>
- <listitem>
- <para>
- The method
- <function>QPDFWriter::registerProgressReporter</function>
- has been added. This method allows you to register a
- function that is called by <classname>QPDFWriter</classname>
- to update your idea of the percentage it thinks it is
- through writing its output. Client programs can use this to
- implement reasonably accurate progress meters. The
- @1@command@1@qpdf@2@command@2@ command line tool uses this to
- implement its @1@option@1@--progress@2@option@2@ option.
- </para>
- </listitem>
- <listitem>
- <para>
- New methods
- <function>QPDFObjectHandle::newUnicodeString</function> and
- <function>QPDFObject::unparseBinary</function> have been
- added to allow for more convenient creation of strings that
- are explicitly encoded using big-endian UTF-16. This is
- useful for creating strings that appear outside of content
- streams, such as labels, form fields, outlines, document
- metadata, etc.
- </para>
- </listitem>
- <listitem>
- <para>
- A new class
- <classname>QPDFObjectHandle::Rectangle</classname> has been
- added to ease working with PDF rectangles, which are just
- arrays of four numeric values.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.0.2: March 6, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- When a loop is detected while following cross reference
- streams or tables, treat this as damage instead of silently
- ignoring the previous table. This prevents loss of otherwise
- recoverable data in some damaged files.
- </para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- Properly handle pages with no contents.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.0.1: March 4, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Disregard data check errors when uncompressing
- <literal>/FlateDecode</literal> streams. This is consistent
- with most other PDF readers and allows qpdf to recover data
- from another class of malformed PDF files.
- </para>
- </listitem>
- <listitem>
- <para>
- On the command line when specifying page ranges, support
- preceding a page number by "r" to indicate that it
- should be counted from the end. For example, the range
- <literal>r3-r1</literal> would indicate the last three pages
- of a document.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>8.0.0: February 25, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Packaging and Distribution Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- QPDF is now distributed as an <ulink
- url="https://appimage.org/">AppImage</ulink> in addition to
- all the other ways it is distributed. The AppImage can be
- found in the download area with the other packages. Thanks
- to Kurt Pfeifle and Simon Peter for their contributions.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <function>QPDFObjectHandle::getUTF8Val</function> now
- properly treats non-Unicode strings as encoded with PDF Doc
- Encoding.
- </para>
- </listitem>
- <listitem>
- <para>
- Improvements to handling of objects in PDF files that are
- not of the expected type. In most cases, qpdf will be able
- to warn for such cases rather than fail with an exception.
- Previous versions of qpdf would sometimes fail with errors
- such as "operation for dictionary object attempted on
- object of wrong type". This situation should be mostly
- or entirely eliminated now.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- Enhancements to the @1@command@1@qpdf@2@command@2@ Command-line Tool.
- All new options listed here are documented in more detail in
- <xref linkend="ref.using"/>.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- The option
- @1@option@1@--linearize-pass1=@1@replaceable@1@file@2@replaceable@2@@2@option@2@
- has been added for debugging qpdf's linearization code.
- </para>
- </listitem>
- <listitem>
- <para>
- The option @1@option@1@--coalesce-contents@2@option@2@ can be used
- to combine content streams of a page whose contents are an
- array of streams into a single stream.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>
- API Enhancements. All new API calls are documented in their
- respective classes' header files. There are no non-compatible
- changes to the API.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Add function <function>qpdf_check_pdf</function> to the C API.
- This function does basic checking that is a subset of what
- @1@command@1@qpdf --check@2@command@2@ performs.
- </para>
- </listitem>
- <listitem>
- <para>
- Major enhancements to the lexical layer of qpdf. For a
- complete list of enhancements, please refer to the
- @1@filename@1@ChangeLog@2@filename@2@ file. Most of the changes
- result in improvements to qpdf's ability handle erroneous
- files. It is also possible for programs to handle
- whitespace, comments, and inline images as tokens.
- </para>
- </listitem>
- <listitem>
- <para>
- New API for working with PDF content streams at a lexical
- level. The new class
- <classname>QPDFObjectHandle::TokenFilter</classname> allows
- the developer to provide token handlers. Token filters can be
- used with several different methods in
- <classname>QPDFObjectHandle</classname> as well as with a
- lower-level interface. See comments in
- @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ as well as the new
- examples @1@filename@1@examples/pdf-filter-tokens.cc@2@filename@2@
- and @1@filename@1@examples/pdf-count-strings.cc@2@filename@2@ for
- details.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>7.1.1: February 4, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug fix: files whose /ID fields were other than 16 bytes long
- can now be properly linearized
- </para>
- </listitem>
- <listitem>
- <para>
- A few compile and link issues have been corrected for some
- platforms.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>7.1.0: January 14, 2018</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- PDF files contain streams that may be compressed with various
- compression algorithms which, in some cases, may be enhanced
- by various predictor functions. Previously only the PNG up
- predictor was supported. In this version, all the PNG
- predictors as well as the TIFF predictor are supported. This
- increases the range of files that qpdf is able to handle.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF now allows a raw encryption key to be specified in place
- of a password when opening encrypted files, and will
- optionally display the encryption key used by a file. This is
- a non-standard operation, but it can be useful in certain
- situations. Please see the discussion of
- @1@option@1@--password-is-hex-key@2@option@2@ in <xref
- linkend="ref.basic-options"/> or the comments around
- <function>QPDF::setPasswordIsHexKey</function> in
- @1@filename@1@QPDF.hh@2@filename@2@ for additional details.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: numbers ending with a trailing decimal point are now
- properly recognized as numbers.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: when building qpdf from source on some platforms
- (especially MacOS), the build could get confused by older
- versions of qpdf installed on the system. This has been
- corrected.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>7.0.0: September 15, 2017</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Packaging and Distribution Changes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- QPDF's primary license is now <ulink
- url="http://www.apache.org/licenses/LICENSE-2.0">version 2.0
- of the Apache License</ulink> rather than version 2.0 of the
- Artistic License. You may still, at your option, consider
- qpdf to be licensed with version 2.0 of the Artistic
- license.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF no longer has a dependency on the PCRE (Perl-Compatible
- Regular Expression) library. QPDF now has an added
- dependency on the JPEG library.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- Bug Fixes
- </para>
- <itemizedlist>
- <listitem>
- <para>
- This release contains many bug fixes for various infinite
- loops, memory leaks, and other memory errors that could be
- encountered with specially crafted or otherwise erroneous
- PDF files.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- New Features
- </para>
- <itemizedlist>
- <listitem>
- <para>
- QPDF now supports reading and writing streams encoded with
- JPEG or RunLength encoding. Library API enhancements and
- command-line options have been added to control this
- behavior. See command-line options
- @1@option@1@--compress-streams@2@option@2@ and
- @1@option@1@--decode-level@2@option@2@ and methods
- <function>QPDFWriter::setCompressStreams</function> and
- <function>QPDFWriter::setDecodeLevel</function>.
- </para>
- </listitem>
- <listitem>
- <para>
- QPDF is much better at recovering from broken files. In most
- cases, qpdf will skip invalid objects and will preserve
- broken stream data by not attempting to filter broken
- streams. QPDF is now able to recover or at least not crash
- on dozens of broken test files I have received over the past
- few years.
- </para>
- </listitem>
- <listitem>
- <para>
- Page rotation is now supported and accessible from both the
- library and the command line.
- </para>
- </listitem>
- <listitem>
- <para>
- <classname>QPDFWriter</classname> supports writing files in
- a way that preserves PCLm compliance in support of
- driverless printing. This is very specialized and is only
- useful to applications that already know how to create PCLm
- files.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- Enhancements to the @1@command@1@qpdf@2@command@2@ Command-line Tool.
- All new options listed here are documented in more detail in
- <xref linkend="ref.using"/>.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- Command-line arguments can now be read from files or
- standard input using <literal>@file</literal> or
- <literal>@-</literal> syntax. Please see <xref
- linkend="ref.invocation"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--rotate@2@option@2@: request page rotation
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--newline-before-endstream@2@option@2@: ensure that a
- newline appears before every <literal>endstream</literal>
- keyword in the file; used to prevent qpdf from breaking
- PDF/A compliance on already compliant files.
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--preserve-unreferenced@2@option@2@: preserve
- unreferenced objects in the input PDF
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--split-pages@2@option@2@: break output into chunks
- with fixed numbers of pages
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--verbose@2@option@2@: print the name of each output
- file that is created
- </para>
- </listitem>
- <listitem>
- <para>
- @1@option@1@--compress-streams@2@option@2@ and
- @1@option@1@--decode-level@2@option@2@ replace
- @1@option@1@--stream-data@2@option@2@ for improving granularity of
- controlling compression and decompression of stream data.
- The @1@option@1@--stream-data@2@option@2@ option will remain
- available.
- </para>
- </listitem>
- <listitem>
- <para>
- When running @1@command@1@qpdf --check@2@command@2@ with other
- options, checks are always run first. This enables qpdf to
- perform its full recovery logic before outputting other
- information. This can be especially useful when manually
- recovering broken files, looking at qpdf's regenerated cross
- reference table, or other similar operations.
- </para>
- </listitem>
- <listitem>
- <para>
- Process @1@command@1@--pages@2@command@2@ earlier so that other
- options like @1@option@1@--show-pages@2@option@2@ or
- @1@option@1@--split-pages@2@option@2@ can operate on the file after
- page splitting/merging has occurred.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>
- API Changes. All new API calls are documented in their
- respective classes' header files.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- <function>QPDFObjectHandle::rotatePage</function>: apply
- rotation to a page object
- </para>
- </listitem>
- <listitem>
- <para>
- <function>QPDFWriter::setNewlineBeforeEndstream</function>:
- force newline to appear before <literal>endstream</literal>
- </para>
- </listitem>
- <listitem>
- <para>
- <function>QPDFWriter::setPreserveUnreferencedObjects</function>:
- preserve unreferenced objects that appear in the input PDF.
- The default behavior is to discard them.
- </para>
- </listitem>
- <listitem>
- <para>
- New <classname>Pipeline</classname> types
- <classname>Pl_RunLength</classname> and
- <classname>Pl_DCT</classname> are available for developers
- who wish to produce or consume RunLength or DCT stream data
- directly. The @1@filename@1@examples/pdf-create.cc@2@filename@2@
- example illustrates their use.
- </para>
- </listitem>
- <listitem>
- <para>
- <function>QPDFWriter::setCompressStreams</function> and
- <function>QPDFWriter::setDecodeLevel</function> methods
- control handling of different types of stream compression.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new C API functions
- <function>qpdf_set_compress_streams</function>,
- <function>qpdf_set_decode_level</function>,
- <function>qpdf_set_preserve_unreferenced_objects</function>,
- and <function>qpdf_set_newline_before_endstream</function>
- corresponding to the new <classname>QPDFWriter</classname>
- methods.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>6.0.0: November 10, 2015</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Implement @1@option@1@--deterministic-id@2@option@2@ command-line
- option and <function>QPDFWriter::setDeterministicID</function>
- as well as C API function
- <function>qpdf_set_deterministic_ID</function> for generating
- a deterministic ID for non-encrypted files. When this option
- is selected, the ID of the file depends on the contents of the
- output file, and not on transient items such as the timestamp
- or output file name.
- </para>
- </listitem>
- <listitem>
- <para>
- Make qpdf more tolerant of files whose xref table entries are
- not the correct length.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>5.1.3: May 24, 2015</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug fix: fix-qdf was not properly handling files that
- contained object streams with more than 255 objects in them.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: qpdf was not properly initializing Microsoft's secure
- crypto provider on fresh Windows installations that had not
- had any keys created yet.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix a few errors found by Gynvael Coldwind and
- Mateusz Jurczyk of the Google Security Team. Please see the
- ChangeLog for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Properly handle pages that have no contents at all. There were
- many cases in which qpdf handled this fine, but a few methods
- blindly obtained page contents with handling the possibility
- that there were no contents.
- </para>
- </listitem>
- <listitem>
- <para>
- Make qpdf more robust for a few more kinds of problems that
- may occur in invalid PDF files.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>5.1.2: June 7, 2014</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug fix: linearizing files could create a corrupted output
- file under extremely unlikely file size circumstances. See
- ChangeLog for details. The odds of getting hit by this are
- very low, though one person did.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: qpdf would fail to write files that had streams with
- decode parameters referencing other streams.
- </para>
- </listitem>
- <listitem>
- <para>
- New example program: @1@command@1@pdf-split-pages@2@command@2@:
- efficiently split PDF files into individual pages. The example
- program does this more efficiently than using @1@command@1@qpdf
- --pages@2@command@2@ to do it.
- </para>
- </listitem>
- <listitem>
- <para>
- Packaging fix: Visual C++ binaries did not support Windows XP.
- This has been rectified by updating the compilers used to
- generate the release binaries.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>5.1.1: January 14, 2014</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Performance fix: copying foreign objects could be very slow
- with certain types of files. This was most likely to be
- visible during page splitting and was due to traversing the
- same objects multiple times in some cases.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>5.1.0: December 17, 2013</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Added runtime option
- (<function>QUtil::setRandomDataProvider</function>) to supply
- your own random data provider. You can use this if you want
- to avoid using the OS-provided secure random number generation
- facility or stdlib's less secure version. See comments in
- include/qpdf/QUtil.hh for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Fixed image comparison tests to not create 12-bit-per-pixel
- images since some versions of tiffcmp have bugs in comparing
- them in some cases. This increases the disk space required by
- the image comparison tests, which are off by default anyway.
- </para>
- </listitem>
- <listitem>
- <para>
- Introduce a number of small fixes for compilation on the
- latest clang in MacOS and the latest Visual C++ in Windows.
- </para>
- </listitem>
- <listitem>
- <para>
- Be able to handle broken files that end the xref table header
- with a space instead of a newline.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>5.0.1: October 18, 2013</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Thanks to a detailed review by Florian Weimer and the Red Hat
- Product Security Team, this release includes a number of
- non-user-visible security hardening changes. Please see the
- ChangeLog file in the source distribution for the complete
- list.
- </para>
- </listitem>
- <listitem>
- <para>
- When available, operating system-specific secure random number
- generation is used for generating initialization vectors and
- other random values used during encryption or file creation.
- For the Windows build, this results in an added dependency on
- Microsoft's cryptography API. To disable the OS-specific
- cryptography and use the old version, pass the
- @1@option@1@--enable-insecure-random@2@option@2@ option to
- @1@command@1@./configure@2@command@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@command@1@qpdf@2@command@2@ command-line tool now issues a
- warning when @1@option@1@-accessibility=n@2@option@2@ is specified
- for newer encryption versions stating that the option is
- ignored. qpdf, per the spec, has always ignored this flag,
- but it previously did so silently. This warning is issued
- only by the command-line tool, not by the library. The
- library's handling of this flag is unchanged.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>5.0.0: July 10, 2013</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug fix: previous versions of qpdf would lose objects with
- generation != 0 when generating object streams. Fixing this
- required changes to the public API.
- </para>
- </listitem>
- <listitem>
- <para>
- Removed methods from public API that were only supposed to be
- called by QPDFWriter and couldn't realistically be called
- anywhere else. See ChangeLog for details.
- </para>
- </listitem>
- <listitem>
- <para>
- New <type>QPDFObjGen</type> class added to represent an object
- ID/generation pair.
- <function>QPDFObjectHandle::getObjGen()</function> is now
- preferred over
- <function>QPDFObjectHandle::getObjectID()</function> and
- <function>QPDFObjectHandle::getGeneration()</function> as it
- makes it less likely for people to accidentally write code
- that ignores the generation number. See
- @1@filename@1@QPDF.hh@2@filename@2@ and
- @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ for additional notes.
- </para>
- </listitem>
- <listitem>
- <para>
- Add @1@option@1@--show-npages@2@option@2@ command-line option to the
- @1@command@1@qpdf@2@command@2@ command to show the number of pages in
- a file.
- </para>
- </listitem>
- <listitem>
- <para>
- Allow omission of the page range within
- @1@option@1@--pages@2@option@2@ for the @1@command@1@qpdf@2@command@2@
- command. When omitted, the page range is implicitly taken to
- be all the pages in the file.
- </para>
- </listitem>
- <listitem>
- <para>
- Various enhancements were made to support different types of
- broken files or broken readers. Details can be found in
- @1@filename@1@ChangeLog@2@filename@2@.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>4.1.0: April 14, 2013</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Note to people including qpdf in distributions: the
- @1@filename@1@.la@2@filename@2@ files generated by libtool are now
- installed by qpdf's @1@command@1@make install@2@command@2@ target.
- Before, they were not installed. This means that if your
- distribution does not want to include @1@filename@1@.la@2@filename@2@
- files, you must remove them as part of your packaging process.
- </para>
- </listitem>
- <listitem>
- <para>
- Major enhancement: API enhancements have been made to support
- parsing of content streams. This enhancement includes the
- following changes:
- <itemizedlist>
- <listitem>
- <para>
- <function>QPDFObjectHandle::parseContentStream</function>
- method parses objects in a content stream and calls
- handlers in a callback class. The example
- @1@filename@1@examples/pdf-parse-content.cc@2@filename@2@
- illustrates how this may be used.
- </para>
- </listitem>
- <listitem>
- <para>
- <type>QPDFObjectHandle</type> can now represent operators
- and inline images, object types that may only appear in
- content streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Method <function>QPDFObjectHandle::getTypeCode()</function>
- returns an enumerated type value representing the
- underlying object type. Method
- <function>QPDFObjectHandle::getTypeName()</function>
- returns a text string describing the name of the type of a
- <type>QPDFObjectHandle</type> object. These methods can be
- used for more efficient parsing and debugging/diagnostic
- messages.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- <listitem>
- <para>
- @1@command@1@qpdf --check@2@command@2@ now parses all pages' content
- streams in addition to doing other checks. While there are
- still many types of errors that cannot be detected, syntactic
- errors in content streams will now be reported.
- </para>
- </listitem>
- <listitem>
- <para>
- Minor compilation enhancements have been made to facilitate
- easier for support for a broader range of compilers and
- compiler versions.
- <itemizedlist>
- <listitem>
- <para>
- Warning flags have been moved into a separate variable in
- @1@filename@1@autoconf.mk@2@filename@2@
- </para>
- </listitem>
- <listitem>
- <para>
- The configure flag @1@option@1@--enable-werror@2@option@2@ work
- for Microsoft compilers
- </para>
- </listitem>
- <listitem>
- <para>
- All MSVC CRT security warnings have been resolved.
- </para>
- </listitem>
- <listitem>
- <para>
- All C-style casts in C++ Code have been replaced by C++
- casts, and many casts that had been included to suppress
- higher warning levels for some compilers have been removed,
- primarily for clarity. Places where integer type coercion
- occurs have been scrutinized. A new casting policy has
- been documented in the manual. This is of concern mainly
- to people porting qpdf to new platforms or compilers. It
- is not visible to programmers writing code that uses the
- library
- </para>
- </listitem>
- <listitem>
- <para>
- Some internal limits have been removed in code that
- converts numbers to strings. This is largely invisible to
- users, but it does trigger a bug in some older versions of
- mingw-w64's C++ library. See
- @1@filename@1@README-windows.md@2@filename@2@ in the source
- distribution if you think this may affect you. The copy of
- the DLL distributed with qpdf's binary distribution is not
- affected by this problem.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- <listitem>
- <para>
- The RPM spec file previously included with qpdf has been
- removed. This is because virtually all Linux distributions
- include qpdf now that it is a dependency of CUPS filters.
- </para>
- </listitem>
- <listitem>
- <para>
- A few bug fixes are included:
- <itemizedlist>
- <listitem>
- <para>
- Overridden compressed objects are properly handled.
- Before, there were certain constructs that could cause qpdf
- to see old versions of some objects. The most usual
- manifestation of this was loss of filled in form values for
- certain files.
- </para>
- </listitem>
- <listitem>
- <para>
- Installation no longer uses GNU/Linux-specific versions of
- some commands, so @1@command@1@make install@2@command@2@ works on
- Solaris with native tools.
- </para>
- </listitem>
- <listitem>
- <para>
- The 64-bit mingw Windows binary package no longer includes
- a 32-bit DLL.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>4.0.1: January 17, 2013</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Fix detection of binary attachments in test suite to avoid
- false test failures on some platforms.
- </para>
- </listitem>
- <listitem>
- <para>
- Add clarifying comment in @1@filename@1@QPDF.hh@2@filename@2@ to
- methods that return the user password explaining that it is no
- longer possible with newer encryption formats to recover the
- user password knowing the owner password. In earlier
- encryption formats, the user password was encrypted in the
- file using the owner password. In newer encryption formats, a
- separate encryption key is used on the file, and that key is
- independently encrypted using both the user password and the
- owner password.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>4.0.0: December 31, 2012</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Major enhancement: support has been added for newer encryption
- schemes supported by version X of Adobe Acrobat. This
- includes use of 127-character passwords, 256-bit encryption
- keys, and the encryption scheme specified in ISO 32000-2, the
- PDF 2.0 specification. This scheme can be chosen from the
- command line by specifying use of 256-bit keys. qpdf also
- supports the deprecated encryption method used by Acrobat IX.
- This encryption style has known security weaknesses and should
- not be used in practice. However, such files exist "in
- the wild," so support for this scheme is still useful.
- New methods
- <function>QPDFWriter::setR6EncryptionParameters</function>
- (for the PDF 2.0 scheme) and
- <function>QPDFWriter::setR5EncryptionParameters</function>
- (for the deprecated scheme) have been added to enable these
- new encryption schemes. Corresponding functions have been
- added to the C API as well.
- </para>
- </listitem>
- <listitem>
- <para>
- Full support for Adobe extension levels in PDF version
- information. Starting with PDF version 1.7, corresponding to
- ISO 32000, Adobe adds new functionality by increasing the
- extension level rather than increasing the version. This
- support includes addition of the
- <function>QPDF::getExtensionLevel</function> method for
- retrieving the document's extension level, addition of
- versions of
- <function>QPDFWriter::setMinimumPDFVersion</function> and
- <function>QPDFWriter::forcePDFVersion</function> that accept
- an extension level, and extended syntax for specifying forced
- and minimum versions on the command line as described in <xref
- linkend="ref.advanced-transformation"/>. Corresponding
- functions have been added to the C API as well.
- </para>
- </listitem>
- <listitem>
- <para>
- Minor fixes to prevent qpdf from referencing objects in the
- file that are not referenced in the file's overall structure.
- Most files don't have any such objects, but some files have
- contain unreferenced objects with errors, so these fixes
- prevent qpdf from needlessly rejecting or complaining about
- such objects.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new generalized methods for reading and writing files
- from/to programmer-defined sources. The method
- <function>QPDF::processInputSource</function> allows the
- programmer to use any input source for the input file, and
- <function>QPDFWriter::setOutputPipeline</function> allows the
- programmer to write the output file through any pipeline.
- These methods would make it possible to perform any number of
- specialized operations, such as accessing external storage
- systems, creating bindings for qpdf in other programming
- languages that have their own I/O systems, etc.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method <function>QPDF::getEncryptionKey</function> for
- retrieving the underlying encryption key used in the file.
- </para>
- </listitem>
- <listitem>
- <para>
- This release includes a small handful of non-compatible API
- changes. While effort is made to avoid such changes, all the
- non-compatible API changes in this version were to parts of
- the API that would likely never be used outside the library
- itself. In all cases, the altered methods or structures were
- parts of the <classname>QPDF</classname> that were public to
- enable them to be called from either
- <classname>QPDFWriter</classname> or were part of validation
- code that was over-zealous in reporting problems in parts of
- the file that would not ordinarily be referenced. In no case
- did any of the removed methods do anything worse that falsely
- report error conditions in files that were broken in ways that
- didn't matter. The following public parts of the
- <classname>QPDF</classname> class were changed in a
- non-compatible way:
- <itemizedlist>
- <listitem>
- <para>
- Updated nested <classname>QPDF::EncryptionData</classname>
- class to add fields needed by the newer encryption formats,
- member variables changed to private so that future changes
- will not require breaking backward compatibility.
- </para>
- </listitem>
- <listitem>
- <para>
- Added additional parameters to
- <function>compute_data_key</function>, which is used by
- <classname>QPDFWriter</classname> to compute the encryption
- key used to encrypt a specific object.
- </para>
- </listitem>
- <listitem>
- <para>
- Removed the method
- <function>flattenScalarReferences</function>. This method
- was previously used prior to writing a new PDF file, but it
- has the undesired side effect of causing qpdf to read
- objects in the file that were not referenced. Some
- otherwise files have unreferenced objects with errors in
- them, so this could cause qpdf to reject files that would
- be accepted by virtually all other PDF readers. In fact,
- qpdf relied on only a very small part of what
- flattenScalarReferences did, so only this part has been
- preserved, and it is now done directly inside
- <classname>QPDFWriter</classname>.
- </para>
- </listitem>
- <listitem>
- <para>
- Removed the method <function>decodeStreams</function>.
- This method was used by the @1@option@1@--check@2@option@2@ option
- of the @1@command@1@qpdf@2@command@2@ command-line tool to force
- all streams in the file to be decoded, but it also suffered
- from the problem of opening otherwise unreferenced streams
- and thus could report false positive. The
- @1@option@1@--check@2@option@2@ option now causes qpdf to go
- through all the motions of writing a new file based on the
- original one, so it will always reference and check exactly
- those parts of a file that any ordinary viewer would check.
- </para>
- </listitem>
- <listitem>
- <para>
- Removed the method
- <function>trimTrailerForWrite</function>. This method was
- used by <classname>QPDFWriter</classname> to modify the
- original QPDF object by removing fields from the trailer
- dictionary that wouldn't apply to the newly written file.
- This functionality, though generally harmless, was a poor
- implementation and has been replaced by having QPDFWriter
- filter these out when copying the trailer rather than
- modifying the original QPDF object. (Note that qpdf never
- modifies the original file itself.)
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </listitem>
- <listitem>
- <para>
- Allow the PDF header to appear anywhere in the first 1024
- bytes of the file. This is consistent with what other readers
- do.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix the @1@command@1@pkg-config@2@command@2@ files to list zlib and
- pcre in <function>Requires.private</function> to better
- support static linking using @1@command@1@pkg-config@2@command@2@.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>3.0.2: September 6, 2012</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug fix: <function>QPDFWriter::setOutputMemory</function> did
- not work when not used with
- <function>QPDFWriter::setStaticID</function>, which made it
- pretty much useless. This has been fixed.
- </para>
- </listitem>
- <listitem>
- <para>
- New API call
- <function>QPDFWriter::setExtraHeaderText</function> inserts
- additional text near the header of the PDF file. The intended
- use case is to insert comments that may be consumed by a
- downstream application, though other use cases may exist.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>3.0.1: August 11, 2012</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Version 3.0.0 included addition of files for
- @1@command@1@pkg-config@2@command@2@, but this was not mentioned in
- the release notes. The release notes for 3.0.0 were updated
- to mention this.
- </para>
- </listitem>
- <listitem>
- <para>
- Bug fix: if an object stream ended with a scalar object not
- followed by space, qpdf would incorrectly report that it
- encountered a premature EOF. This bug has been in qpdf since
- version 2.0.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>3.0.0: August 2, 2012</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Acknowledgment: I would like to express gratitude for the
- contributions of Tobias Hoffmann toward the release of qpdf
- version 3.0. He is responsible for most of the implementation
- and design of the new API for manipulating pages, and
- contributed code and ideas for many of the improvements made
- in version 3.0. Without his work, this release would
- certainly not have happened as soon as it did, if at all.
- </para>
- </listitem>
- <listitem>
- <para>
- <emphasis>Non-compatible API change:</emphasis> The version of
- <function>QPDFObjectHandle::replaceStreamData</function> that
- uses a <classname>StreamDataProvider</classname> no longer
- requires (or accepts) a <varname>length</varname> parameter.
- See <xref linkend="ref.upgrading-to-3.0"/> for an explanation.
- While care is taken to avoid non-compatible API changes in
- general, an exception was made this time because the new
- interface offers an opportunity to significantly simplify
- calling code.
- </para>
- </listitem>
- <listitem>
- <para>
- Support has been added for large files. The test suite
- verifies support for files larger than 4 gigabytes, and manual
- testing has verified support for files larger than 10
- gigabytes. Large file support is available for both 32-bit
- and 64-bit platforms as long as the compiler and underlying
- platforms support it.
- </para>
- </listitem>
- <listitem>
- <para>
- Support for page selection (splitting and merging PDF files)
- has been added to the @1@command@1@qpdf@2@command@2@ command-line
- tool. See <xref linkend="ref.page-selection"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- Options have been added to the @1@command@1@qpdf@2@command@2@
- command-line tool for copying encryption parameters from
- another file. See <xref linkend="ref.basic-options"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- New methods have been added to the <classname>QPDF</classname>
- object for adding and removing pages. See <xref
- linkend="ref.adding-and-remove-pages"/>.
- </para>
- </listitem>
- <listitem>
- <para>
- New methods have been added to the <classname>QPDF</classname>
- object for copying objects from other PDF files. See <xref
- linkend="ref.foreign-objects"/>
- </para>
- </listitem>
- <listitem>
- <para>
- A new method <function>QPDFObjectHandle::parse</function> has
- been added for constructing
- <classname>QPDFObjectHandle</classname> objects from a string
- description.
- </para>
- </listitem>
- <listitem>
- <para>
- Methods have been added to <classname>QPDFWriter</classname>
- to allow writing to an already open stdio <type>FILE*</type>
- addition to writing to standard output or a named file.
- Methods have been added to <classname>QPDF</classname> to be
- able to process a file from an already open stdio
- <type>FILE*</type>. This makes it possible to read and write
- PDF from secure temporary files that have been unlinked prior
- to being fully read or written.
- </para>
- </listitem>
- <listitem>
- <para>
- The <function>QPDF::emptyPDF</function> can be used to allow
- creation of PDF files from scratch. The example
- @1@filename@1@examples/pdf-create.cc@2@filename@2@ illustrates how it
- can be used.
- </para>
- </listitem>
- <listitem>
- <para>
- Several methods to take
- <classname>PointerHolder&lt;Buffer&gt;</classname> can now
- also accept <type>std::string</type> arguments.
- </para>
- </listitem>
- <listitem>
- <para>
- Many new convenience methods have been added to the library,
- most in <classname>QPDFObjectHandle</classname>. See
- @1@filename@1@ChangeLog@2@filename@2@ for a full list.
- </para>
- </listitem>
- <listitem>
- <para>
- When building on a platform that supports ELF shared libraries
- (such as Linux), symbol versions are enabled by default. They
- can be disabled by passing
- @1@option@1@--disable-ld-version-script@2@option@2@ to
- @1@command@1@./configure@2@command@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- The file @1@filename@1@libqpdf.pc@2@filename@2@ is now installed to
- support @1@command@1@pkg-config@2@command@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- Image comparison tests are off by default now since they are
- not needed to verify a correct build or port of qpdf. They
- are needed only when changing the actual PDF output generated
- by qpdf. You should enable them if you are making deep
- changes to qpdf itself. See @1@filename@1@README.md@2@filename@2@ for
- details.
- </para>
- </listitem>
- <listitem>
- <para>
- Large file tests are off by default but can be turned on with
- @1@command@1@./configure@2@command@2@ or by setting an environment
- variable before running the test suite. See
- @1@filename@1@README.md@2@filename@2@ for details.
- </para>
- </listitem>
- <listitem>
- <para>
- When qpdf's test suite fails, failures are not printed to the
- terminal anymore by default. Instead, find them in
- @1@filename@1@build/qtest.log@2@filename@2@. For packagers who are
- building with an autobuilder, you can add the
- @1@option@1@--enable-show-failed-test-output@2@option@2@ option to
- @1@command@1@./configure@2@command@2@ to restore the old behavior.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.3.1: December 28, 2011</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Fix thread-safety problem resulting from non-thread-safe use
- of the PCRE library.
- </para>
- </listitem>
- <listitem>
- <para>
- Made a few minor documentation fixes.
- </para>
- </listitem>
- <listitem>
- <para>
- Add workaround for a bug that appears in some versions of
- ghostscript to the test suite
- </para>
- </listitem>
- <listitem>
- <para>
- Fix minor build issue for Visual C++ 2010.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.3.0: August 11, 2011</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Bug fix: when preserving existing encryption on encrypted
- files with cleartext metadata, older qpdf versions would
- generate password-protected files with no valid password.
- This operation now works. This bug only affected files
- created by copying existing encryption parameters; explicit
- encryption with specification of cleartext metadata worked
- before and continues to work.
- </para>
- </listitem>
- <listitem>
- <para>
- Enhance <classname>QPDFWriter</classname> with a new
- constructor that allows you to delay the specification of the
- output file. When using this constructor, you may now call
- <function>QPDFWriter::setOutputFilename</function> to specify
- the output file, or you may use
- <function>QPDFWriter::setOutputMemory</function> to cause
- <classname>QPDFWriter</classname> to write the resulting PDF
- file to a memory buffer. You may then use
- <function>QPDFWriter::getBuffer</function> to retrieve the
- memory buffer.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new API call <function>QPDF::replaceObject</function> for
- replacing objects by object ID
- </para>
- </listitem>
- <listitem>
- <para>
- Add new API call <function>QPDF::swapObjects</function> for
- swapping two objects by object ID
- </para>
- </listitem>
- <listitem>
- <para>
- Add <function>QPDFObjectHandle::getDictAsMap</function> and
- <function>QPDFObjectHandle::getArrayAsVector</function> to
- allow retrieval of dictionary objects as maps and array
- objects as vectors.
- </para>
- </listitem>
- <listitem>
- <para>
- Add functions <function>qpdf_get_info_key</function> and
- <function>qpdf_set_info_key</function> to the C API for
- manipulating string fields of the document's
- <literal>/Info</literal> dictionary.
- </para>
- </listitem>
- <listitem>
- <para>
- Add functions <function>qpdf_init_write_memory</function>,
- <function>qpdf_get_buffer_length</function>, and
- <function>qpdf_get_buffer</function> to the C API for writing
- PDF files to a memory buffer instead of a file.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.2.4: June 25, 2011</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Fix installation and compilation issues; no functionality
- changes.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.2.3: April 30, 2011</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Handle some damaged streams with incorrect characters
- following the stream keyword.
- </para>
- </listitem>
- <listitem>
- <para>
- Improve handling of inline images when normalizing content
- streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Enhance error recovery to properly handle files that use
- object 0 as a regular object, which is specifically disallowed
- by the spec.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.2.2: October 4, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Add new function <function>qpdf_read_memory</function>
- to the C API to call
- <function>QPDF::processMemoryFile</function>. This was an
- omission in qpdf 2.2.1.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.2.1: October 1, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Add new method <function>QPDF::setOutputStreams</function>
- to replace <varname>std::cout</varname> and
- <varname>std::cerr</varname> with other streams for generation
- of diagnostic messages and error messages. This can be useful
- for GUIs or other applications that want to capture any output
- generated by the library to present to the user in some other
- way. Note that QPDF does not write to
- <varname>std::cout</varname> (or the specified output stream)
- except where explicitly mentioned in
- @1@filename@1@QPDF.hh@2@filename@2@, and that the only use of the
- error stream is for warnings. Note also that output of
- warnings is suppressed when
- <literal>setSuppressWarnings(true)</literal> is called.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method <function>QPDF::processMemoryFile</function>
- for operating on PDF files that are loaded into memory rather
- than in a file on disk.
- </para>
- </listitem>
- <listitem>
- <para>
- Give a warning but otherwise ignore empty PDF objects by
- treating them as null. Empty object are not permitted by the
- PDF specification but have been known to appear in some actual
- PDF files.
- </para>
- </listitem>
- <listitem>
- <para>
- Handle inline image filter abbreviations when the appear as
- stream filter abbreviations. The PDF specification does not
- allow use of stream filter abbreviations in this way, but
- Adobe Reader and some other PDF readers accept them since they
- sometimes appear incorrectly in actual PDF files.
- </para>
- </listitem>
- <listitem>
- <para>
- Implement miscellaneous enhancements to
- <classname>PointerHolder</classname> and
- <classname>Buffer</classname> to support other changes.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.2.0: August 14, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Add new methods to <classname>QPDFObjectHandle</classname>
- (<function>newStream</function> and
- <function>replaceStreamData</function> for creating new
- streams and replacing stream data. This makes it possible to
- perform a wide range of operations that were not previously
- possible.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new helper method in
- <classname>QPDFObjectHandle</classname>
- (<function>addPageContents</function>) for appending or
- prepending new content streams to a page. This method makes
- it possible to manipulate content streams without having to be
- concerned whether a page's contents are a single stream or an
- array of streams.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method in <classname>QPDFObjectHandle</classname>:
- <function>replaceOrRemoveKey</function>, which replaces a
- dictionary key
- with a given value unless the value is null, in which case it
- removes the key instead.
- </para>
- </listitem>
- <listitem>
- <para>
- Add new method in <classname>QPDFObjectHandle</classname>:
- <function>getRawStreamData</function>, which returns the raw
- (unfiltered) stream data into a buffer. This complements the
- <function>getStreamData</function> method, which returns the
- filtered (uncompressed) stream data and can only be used when
- the stream's data is filterable.
- </para>
- </listitem>
- <listitem>
- <para>
- Provide two new examples:
- @1@command@1@pdf-double-page-size@2@command@2@ and
- @1@command@1@pdf-invert-images@2@command@2@ that illustrate the newly
- added interfaces.
- </para>
- </listitem>
- <listitem>
- <para>
- Fix a memory leak that would cause loss of a few bytes for
- every object involved in a cycle of object references. Thanks
- to Jian Ma for calling my attention to the leak.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.1.5: April 25, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Remove restriction of file identifier strings to 16 bytes.
- This unnecessary restriction was preventing qpdf from being
- able to encrypt or decrypt files with identifier strings that
- were not exactly 16 bytes long. The specification imposes no
- such restriction.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.1.4: April 18, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Apply the same padding calculation fix from version 2.1.2 to
- the main cross reference stream as well.
- </para>
- </listitem>
- <listitem>
- <para>
- Since @1@command@1@qpdf --check@2@command@2@ only performs limited
- checks, clarify the output to make it clear that there still
- may be errors that qpdf can't check. This should make it less
- surprising to people when another PDF reader is unable to read
- a file that qpdf thinks is okay.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.1.3: March 27, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Fix bug that could cause a failure when rewriting PDF files
- that contain object streams with unreferenced objects that in
- turn reference indirect scalars.
- </para>
- </listitem>
- <listitem>
- <para>
- Don't complain about (invalid) AES streams that aren't a
- multiple of 16 bytes. Instead, pad them before decrypting.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.1.2: January 24, 2010</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Fix bug in padding around first half cross reference stream in
- linearized files. The bug could cause an assertion failure
- when linearizing certain unlucky files.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.1.1: December 14, 2009</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- No changes in functionality; insert missing include in an
- internal library header file to support gcc 4.4, and update
- test suite to ignore broken Adobe Reader installations.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.1: October 30, 2009</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- This is the first version of qpdf to include Windows support.
- On Windows, it is possible to build a DLL. Additionally, a
- partial C-language API has been introduced, which makes it
- possible to call qpdf functions from non-C++ environments. I
- am very grateful to Žarko Gajić (<ulink
- url="http://zarko-gajic.iz.hr/">http://zarko-gajic.iz.hr/</ulink>)
- for tirelessly testing numerous pre-release versions of this
- DLL and providing many excellent suggestions on improving the
- interface.
- </para>
- <para>
- For programming to the C interface, please see the header file
- @1@filename@1@qpdf/qpdf-c.h@2@filename@2@ and the example
- @1@filename@1@examples/pdf-linearize.c@2@filename@2@.
- </para>
- </listitem>
- <listitem>
- <para>
- Žarko Gajić has written a Delphi wrapper for qpdf, which can
- be downloaded from qpdf's download side. Žarko's Delphi
- wrapper is released with the same licensing terms as qpdf
- itself and comes with this disclaimer: "Delphi wrapper
- unit @1@filename@1@qpdf.pas@2@filename@2@ created by Žarko Gajić
- (<ulink
- url="http://zarko-gajic.iz.hr/">http://zarko-gajic.iz.hr/</ulink>).
- Use at your own risk and for whatever purpose you want. No
- support is provided. Sample code is provided."
- </para>
- </listitem>
- <listitem>
- <para>
- Support has been added for AES encryption and crypt filters.
- Although qpdf does not presently support files that use
- PKI-based encryption, with the addition of AES and crypt
- filters, qpdf is now be able to open most encrypted files
- created with newer versions of Acrobat or other PDF creation
- software. Note that I have not been able to get very many
- files encrypted in this way, so it's possible there could
- still be some cases that qpdf can't handle. Please report
- them if you find them.
- </para>
- </listitem>
- <listitem>
- <para>
- Many error messages have been improved to include more
- information in hopes of making qpdf a more useful tool for PDF
- experts to use in manually recovering damaged PDF files.
- </para>
- </listitem>
- <listitem>
- <para>
- Attempt to avoid compressing metadata streams if possible.
- This is consistent with other PDF creation applications.
- </para>
- </listitem>
- <listitem>
- <para>
- Provide new command-line options for AES encrypt, cleartext
- metadata, and setting the minimum and forced PDF versions of
- output files.
- </para>
- </listitem>
- <listitem>
- <para>
- Add additional methods to the <classname>QPDF</classname>
- object for querying the document's permissions. Although qpdf
- does not enforce these permissions, it does make them
- available so that applications that use qpdf can enforce
- permissions.
- </para>
- </listitem>
- <listitem>
- <para>
- The @1@option@1@--check@2@option@2@ option to @1@command@1@qpdf@2@command@2@
- has been extended to include some additional information.
- </para>
- </listitem>
- <listitem>
- <para>
- There have been a handful of non-compatible API changes. For
- details, see <xref linkend="ref.upgrading-to-2.1"/>.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0.6: May 3, 2009</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Do not attempt to uncompress streams that have decode
- parameters we don't recognize. Earlier versions of qpdf would
- have rejected files with such streams.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0.5: March 10, 2009</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Improve error handling in the LZW decoder, and fix a small
- error introduced in the previous version with regard to
- handling full tables. The LZW decoder has been more strongly
- verified in this release.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0.4: February 21, 2009</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Include proper support for LZW streams encoded without the
- "early code change" flag. Special thanks to Atom
- Smasher who reported the problem and provided an input file
- compressed in this way, which I did not previously have.
- </para>
- </listitem>
- <listitem>
- <para>
- Implement some improvements to file recovery logic.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0.3: February 15, 2009</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Compile cleanly with gcc 4.4.
- </para>
- </listitem>
- <listitem>
- <para>
- Handle strings encoded as UTF-16BE properly.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0.2: June 30, 2008</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- Update test suite to work properly with a
- non-@1@command@1@bash@2@command@2@ @1@filename@1@/bin/sh@2@filename@2@ and
- with Perl 5.10. No changes were made to the actual qpdf
- source code itself for this release.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0.1: May 6, 2008</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- No changes in functionality or interface. This release
- includes fixes to the source code so that qpdf compiles
- properly and passes its test suite on a broader range of
- platforms. See @1@filename@1@ChangeLog@2@filename@2@ in the source
- distribution for details.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term>2.0: April 29, 2008</term>
- <listitem>
- <itemizedlist>
- <listitem>
- <para>
- First public release.
- </para>
- </listitem>
- </itemizedlist>
- </listitem>
- </varlistentry>
- </variablelist>
- </appendix>
- <appendix id="ref.upgrading-to-2.1">
- <title>Upgrading from 2.0 to 2.1</title>
- <para>
- Although, as a general rule, we like to avoid introducing
- source-level incompatibilities in qpdf's interface, there were a
- few non-compatible changes made in this version. A considerable
- amount of source code that uses qpdf will probably compile without
- any changes, but in some cases, you may have to update your code.
- The changes are enumerated here. There are also some new
- interfaces; for those, please refer to the header files.
- </para>
- <itemizedlist>
- <listitem>
- <para>
- QPDF's exception handling mechanism now uses
- <classname>std::logic_error</classname> for internal errors and
- <classname>std::runtime_error</classname> for runtime errors in
- favor of the now removed <classname>QEXC</classname> classes used
- in previous versions. The <classname>QEXC</classname> exception
- classes predated the addition of the
- @1@filename@1@&lt;stdexcept&gt;@2@filename@2@ header file to the C++
- standard library. Most of the exceptions thrown by the qpdf
- library itself are still of type <classname>QPDFExc</classname>
- which is now derived from
- <classname>std::runtime_error</classname>. Programs that caught
- an instance of <classname>std::exception</classname> and
- displayed it by calling the <function>what()</function> method
- will not need to be changed.
- </para>
- </listitem>
- <listitem>
- <para>
- The <classname>QPDFExc</classname> class now internally
- represents various fields of the error condition and provides
- interfaces for querying them. Among the fields is a numeric
- error code that can help applications act differently on (a small
- number of) different error conditions. See
- @1@filename@1@QPDFExc.hh@2@filename@2@ for details.
- </para>
- </listitem>
- <listitem>
- <para>
- Warnings can be retrieved from qpdf as instances of
- <classname>QPDFExc</classname> instead of strings.
- </para>
- </listitem>
- <listitem>
- <para>
- The nested <classname>QPDF::EncryptionData</classname> class's
- constructor takes an additional argument. This class is
- primarily intended to be used by
- <classname>QPDFWriter</classname>. There's not really anything
- useful an end-user application could do with it. It probably
- shouldn't really be part of the public interface to begin with.
- Likewise, some of the methods for computing internal encryption
- dictionary parameters have changed to support
- <literal>/R=4</literal> encryption.
- </para>
- </listitem>
- <listitem>
- <para>
- The method <function>QPDF::getUserPassword</function> has been
- removed since it didn't do what people would think it did. There
- are now two new methods:
- <function>QPDF::getPaddedUserPassword</function> and
- <function>QPDF::getTrimmedUserPassword</function>. The first one
- does what the old <function>QPDF::getUserPassword</function>
- method used to do, which is to return the password with possible
- binary padding as specified by the PDF specification. The second
- one returns a human-readable password string.
- </para>
- </listitem>
- <listitem>
- <para>
- The enumerated types that used to be nested in
- <classname>QPDFWriter</classname> have moved to top-level
- enumerated types and are now defined in the file
- @1@filename@1@qpdf/Constants.h@2@filename@2@. This enables them to be
- shared by both the C and C++ interfaces.
- </para>
- </listitem>
- </itemizedlist>
- </appendix>
- <appendix id="ref.upgrading-to-3.0">
- <title>Upgrading to 3.0</title>
- <para>
- For the most part, the API for qpdf version 3.0 is backward
- compatible with versions 2.1 and later. There are two exceptions:
- <itemizedlist>
- <listitem>
- <para>
- The method
- <function>QPDFObjectHandle::replaceStreamData</function> that
- uses a <classname>StreamDataProvider</classname> to provide the
- stream data no longer takes a <varname>length</varname>
- parameter. While it would have been easy enough to keep the
- parameter for backward compatibility, in this case, the
- parameter was removed since this provides the user an
- opportunity to simplify the calling code. This method was
- introduced in version 2.2. At the time, the
- <varname>length</varname> parameter was required in order to
- ensure that calls to the stream data provider returned the same
- length for a specific stream every time they were invoked. In
- particular, the linearization code depends on this. Instead,
- qpdf 3.0 and newer check for that constraint explicitly. The
- first time the stream data provider is called for a specific
- stream, the actual length is saved, and subsequent calls are
- required to return the same number of bytes. This means the
- calling code no longer has to compute the length in advance,
- which can be a significant simplification. If your code fails
- to compile because of the extra argument and you don't want to
- make other changes to your code, just omit the argument.
- </para>
- </listitem>
- <listitem>
- <para>
- Many methods take <type>long long</type> instead of other
- integer types. Most if not all existing code should compile
- fine with this change since such parameters had always
- previously been smaller types. This change was required to
- support files larger than two gigabytes in size.
- </para>
- </listitem>
- </itemizedlist>
- </para>
- </appendix>
- <appendix id="ref.upgrading-to-4.0">
- <title>Upgrading to 4.0</title>
- <para>
- While version 4.0 includes a few non-compatible API changes, it is
- very unlikely that anyone's code would have used any of those parts
- of the API since they generally required information that would
- only be available inside the library. In the unlikely event that
- you should run into trouble, please see the ChangeLog. See also
- <xref linkend="ref.release-notes"/> for a complete list of the
- non-compatible API changes made in this version.
- </para>
- </appendix>
-</book>