aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README-maintainer4
-rw-r--r--cSpell.json3
-rw-r--r--include/qpdf/Constants.h1
-rw-r--r--include/qpdf/QPDFObjectHandle.hh58
-rw-r--r--libqpdf/QPDFObjectHandle.cc23
-rw-r--r--manual/qpdf-manual.xml104
-rw-r--r--qpdf/qtest/qpdf.test7
-rw-r--r--qpdf/test_driver.cc15
8 files changed, 196 insertions, 19 deletions
diff --git a/README-maintainer b/README-maintainer
index ef0c0000..62da91e6 100644
--- a/README-maintainer
+++ b/README-maintainer
@@ -132,7 +132,9 @@ RELEASE PREPARATION
* Check all open issues and pull requests in github and the
sourceforge trackers. See ~/scripts/github-issues. Don't forget pull
- requests.
+ requests. Note: If the location for reporting issues changes, do a
+ careful check of documentation and code to make sure any comments
+ that include the issue creation URL are updated.
* Check `TODO` file to make sure all planned items for the release are
done or retargeted.
diff --git a/cSpell.json b/cSpell.json
index dc2880f9..32026f36 100644
--- a/cSpell.json
+++ b/cSpell.json
@@ -143,6 +143,7 @@
"hcryptprov",
"hdict",
"hoffmann",
+ "holger",
"hosoda",
"htcondor",
"htdocs",
@@ -205,6 +206,7 @@
"linp",
"listitem",
"ljpeg",
+ "longjmp",
"lpstr",
"lqpdf",
"lssl",
@@ -367,6 +369,7 @@
"scarff",
"seekable",
"segfaulting",
+ "setjmp",
"sharedresources",
"smatch",
"softlink",
diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h
index 45b87d4d..4e4c9e01 100644
--- a/include/qpdf/Constants.h
+++ b/include/qpdf/Constants.h
@@ -38,6 +38,7 @@ enum qpdf_error_code_e
qpdf_e_password, /* incorrect password for encrypted file */
qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */
qpdf_e_pages, /* erroneous or unsupported pages structure */
+ qpdf_e_object, /* type/bounds errors accessing objects */
};
/* Write Parameters. See QPDFWriter.hh for details. */
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 65531fa6..31afd01b 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -609,15 +609,65 @@ class QPDFObjectHandle
QPDF_DLL
bool hasObjectDescription();
- // Accessor methods. If an accessor method that is valid for only
- // a particular object type is called on an object of the wrong
- // type, an exception is thrown.
+ // Accessor methods
+ //
+ // (Note: this comment is referenced in qpdf-c.h and the manual.)
+ //
+ // In PDF files, objects have specific types, but there is nothing
+ // that prevents PDF files from containing objects of types that
+ // aren't expected by the specification. Many of the accessors
+ // here expect objects of a particular type. Prior to qpdf 8,
+ // calling an accessor on a method of the wrong type, such as
+ // trying to get a dictionary key from an array, trying to get the
+ // string value of a number, etc., would throw an exception, but
+ // since qpdf 8, qpdf issues a warning and recovers using the
+ // following behavior:
+ //
+ // * Requesting a value of the wrong type (int value from string,
+ // array item from a scalar or dictionary, etc.) will return a
+ // zero-like value for that type: false for boolean, 0 for
+ // number, the empty string for string, or the null object for
+ // an object handle.
+ //
+ // * Accessing an array item that is out of bounds will return a
+ // null object.
+ //
+ // * Attempts to mutate an object of the wrong type (e.g.,
+ // attempting to add a dictionary key to a scalar or array) will
+ // be ignored.
+ //
+ // When any of these fallback behaviors are used, qpdf issues a
+ // warning. Starting in qpdf 10.5, these warnings have the error
+ // code qpdf_e_object. Prior to 10.5, they had the error code
+ // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with
+ // a QPDF object (as is the case for all objects whose origin was
+ // a PDF file), the warning is issued using the normal warning
+ // mechanism (as described in QPDF.hh), making it possible to
+ // suppress or otherwise detect them. If the QPDFObjectHandle is
+ // not associated with a QPDF object (meaning it was created
+ // programmatically), an exception will be thrown.
+ //
+ // The way to avoid getting any type warnings or exceptions, even
+ // when working with malformed PDF files, is to always check the
+ // type of a QPDFObjectHandle before accessing it (for example,
+ // make sure that isString() returns true before calling
+ // getStringValue()) and to always be sure that any array indices
+ // are in bounds.
+ //
+ // For additional discussion and rationale for this behavior, see
+ // the section in the QPDF manual entitled "Object Accessor
+ // Methods".
// Methods for bool objects
QPDF_DLL
bool getBoolValue();
- // Methods for integer objects
+ // Methods for integer objects. Note: if an integer value is too
+ // big (too far away from zero in either direction) to fit in the
+ // requested return type, the maximum or minimum value for that
+ // return type may be returned. For example, on a system with
+ // 32-bit int, a numeric object with a value of 2^40 (or anything
+ // too big for 32 bits) will be returned as INT_MAX.
QPDF_DLL
long long getIntValue();
QPDF_DLL
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index af862bd4..1a8e3223 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -3048,23 +3048,17 @@ void
QPDFObjectHandle::typeWarning(char const* expected_type,
std::string const& warning)
{
- QPDF* context = 0;
+ QPDF* context = nullptr;
std::string description;
dereference();
- if (this->obj->getDescription(context, description))
- {
- warn(context,
- QPDFExc(
- qpdf_e_damaged_pdf,
+ this->obj->getDescription(context, description);
+ // Null context handled by warn
+ warn(context,
+ QPDFExc(qpdf_e_object,
"", description, 0,
std::string("operation for ") + expected_type +
" attempted on object of type " +
getTypeName() + ": " + warning));
- }
- else
- {
- assertType(expected_type, false);
- }
}
void
@@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning,
void
QPDFObjectHandle::objectWarning(std::string const& warning)
{
- warnIfPossible(warning, true);
+ QPDF* context = nullptr;
+ std::string description;
+ dereference();
+ this->obj->getDescription(context, description);
+ // Null context handled by warn
+ warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
}
void
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index 8a2f5233..fd3bd6fb 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -4560,6 +4560,96 @@ outfile.pdf</option>
filtered stream contents to a given pipeline.
</para>
</sect1>
+ <sect1 id="ref.object-accessors">
+ <!-- This section is referenced in QPDFObjectHandle.hh -->
+ <title>Object Accessor Methods</title>
+ <para>
+ For general information about how to access instances of
+ <classname>QPDFObjectHandle</classname>, please see the comments
+ in <filename>QPDFObjectHandle.hh</filename>. Search for
+ &ldquo;Accessor methods&rdquo;. This section provides a more
+ in-depth discussion of the behavior and the rationale for the
+ behavior.
+ </para>
+ <para>
+ <emphasis>Why were type errors made into warnings?</emphasis> When
+ type checks were introduced into qpdf in the early days, it was
+ expected that type errors would only occur as a result of
+ programmer error. However, in practice, type errors would occur
+ with malformed PDF files because of assumptions made in code,
+ including code within the qpdf library and code written by library
+ users. The most common case would be chaining calls to
+ <function>getKey()</function> to access keys deep within a
+ dictionary. In many cases, qpdf would be able to recover from
+ these situations, but the old behavior often resulted in crashes
+ rather than graceful recovery. For this reason, the errors were
+ changed to warnings.
+ </para>
+ <para>
+ <emphasis>Why even warn about type errors when the user can't
+ usually do anything about them?</emphasis> Type warnings are
+ extremely valuable during development. Since it's impossible to
+ catch at compile time things like typos in dictionary key names or
+ logic errors around what the structure of a PDF file might be, the
+ presence of type warnings can save lots of developer time. They
+ have also proven useful in exposing issues in qpdf itself that
+ would have otherwise gone undetected.
+ </para>
+ <para>
+ <emphasis>Can there be a type-safe
+ <classname>QPDFObjectHandle</classname>?</emphasis> It would be
+ great if <classname>QPDFObjectHandle</classname> could be more
+ strongly typed so that you'd have to have check that something was
+ of a particular type before calling type-specific accessor
+ methods. However, implementing this at this stage of the library's
+ history would be quite difficult, and it would make a the common
+ pattern of drilling into an object no longer work. While it would
+ be possible to have a parallel interface, it would create a lot of
+ extra code. If qpdf were written in a language like rust, an
+ interface like this would make a lot of sense, but, for a variety
+ of reasons, the qpdf API is consistent with other APIs of its
+ time, relying on exception handling to catch errors. The
+ underlying PDF objects are inherently not type-safe. Forcing
+ stronger type safety in <classname>QPDFObjectHandle</classname>
+ would ultimately cause a lot more code to have to be written and
+ would like make software that uses qpdf more brittle, and even so,
+ checks would have to occur at runtime.
+ </para>
+ <para>
+ <emphasis>Why do type errors sometimes raise
+ exceptions?</emphasis> The way warnings work in qpdf requires a
+ <classname>QPDF</classname> object to be associated with an object
+ handle for a warning to be issued. It would be nice if this could
+ be fixed, but it would require major changes to the API. Rather
+ than throwing away these conditions, we convert them to
+ exceptions. It's not that bad though. Since any object handle that
+ was read from a file has an associated <classname>QPDF</classname>
+ object, it would only be type errors on objects that were created
+ explicitly that would cause exceptions, and in that case, type
+ errors are much more likely to be the result of a coding error
+ than invalid input.
+ </para>
+ <para>
+ <emphasis>Why does the behavior of a type exception differ between
+ the C and C++ API?</emphasis> There is no way to throw and catch
+ exceptions in C short of something like
+ <function>setjmp</function> and <function>longjmp</function>, and
+ that approach is not portable across language barriers. Since the
+ C API is often used from other languages, it's important to keep
+ things as simple as possible. Starting in qpdf 10.5, exceptions
+ that used to crash code using the C API will be written to stderr
+ by default, and it is possible to register an error handler.
+ There's no reason that the error handler can't simulate exception
+ handling in some way, such as by using <function>setjmp</function>
+ and <function>longjmp</function> or by setting some variable that
+ can be checked after library calls are made. In retrospect, it
+ might have been better if the C API object handle methods returned
+ error codes like the other methods and set return values in
+ passed-in pointers, but this would complicate both the
+ implementation and the use of the library for a case that is
+ actually quite rare and largely avoidable.
+ </para>
+ </sect1>
</chapter>
<chapter id="ref.linearization">
<title>Linearization</title>
@@ -5127,6 +5217,20 @@ print "\n";
<itemizedlist>
<listitem>
<para>
+ Since qpdf version 8, using object accessor methods on an
+ instance of <classname>QPDFObjectHandle</classname> may
+ create warnings if the object is not of the expected type.
+ These warnings now have an error code of
+ <literal>qpdf_e_object</literal> instead of
+ <literal>qpdf_e_damaged_pdf</literal>. Also, comments have
+ been added to <filename>QPDFObjectHandle.hh</filename> to
+ explain in more detail what the behavior is. See <xref
+ linkend="ref.object-accessors"/> for a more in-depth
+ discussion.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
Add <function>qpdf_get_last_string_length</function> to the
C API to get the length of the last string that was
returned. This is needed to handle strings that contain
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 894b3a01..21f82a22 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -273,12 +273,16 @@ $td->runtest("check final version",
show_ntests();
# ----------
$td->notify("--- Exceptions ---");
-$n_tests += 1;
+$n_tests += 2;
$td->runtest("check exception handling",
{$td->COMMAND => "test_driver 61 -"},
{$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+$td->runtest("check certain exception types",
+ {$td->COMMAND => "test_driver 81 -"},
+ {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
@@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large)
$td->NORMALIZE_NEWLINES);
unlink $file;
}
+show_ntests();
# ----------
cleanup();
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 7d5b2ece..613dc5bd 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
p, size);
}
- else if (n == 61)
+ else if ((n == 61) || (n == 81))
{
// Ignore filename argument entirely
}
@@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2)
w2.setQDFMode(true);
w2.write();
}
+ else if (n == 81)
+ {
+ // Exercise that type errors get their own special type
+ try
+ {
+ QPDFObjectHandle::newNull().getIntValue();
+ assert(false);
+ }
+ catch (QPDFExc& e)
+ {
+ assert(e.getErrorCode() == qpdf_e_object);
+ }
+ }
else
{
throw std::runtime_error(std::string("invalid test ") +