diff options
author | Jay Berkenbilt <ejb@ql.org> | 2021-12-10 15:34:42 +0100 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2021-12-10 17:15:49 +0100 |
commit | 3340dbe9761ef35d580d77a73e17d204579624f1 (patch) | |
tree | 7c317cf30a7ce29019658d5b7bbf5172c036dfa4 | |
parent | b2b2a175c49c0a68018e329ee7637424d1ba3218 (diff) | |
download | qpdf-3340dbe9761ef35d580d77a73e17d204579624f1.tar.zst |
Use a specific error code for type warnings and clarify docs
-rw-r--r-- | README-maintainer | 4 | ||||
-rw-r--r-- | cSpell.json | 3 | ||||
-rw-r--r-- | include/qpdf/Constants.h | 1 | ||||
-rw-r--r-- | include/qpdf/QPDFObjectHandle.hh | 58 | ||||
-rw-r--r-- | libqpdf/QPDFObjectHandle.cc | 23 | ||||
-rw-r--r-- | manual/qpdf-manual.xml | 104 | ||||
-rw-r--r-- | qpdf/qtest/qpdf.test | 7 | ||||
-rw-r--r-- | qpdf/test_driver.cc | 15 |
8 files changed, 196 insertions, 19 deletions
diff --git a/README-maintainer b/README-maintainer index ef0c0000..62da91e6 100644 --- a/README-maintainer +++ b/README-maintainer @@ -132,7 +132,9 @@ RELEASE PREPARATION * Check all open issues and pull requests in github and the sourceforge trackers. See ~/scripts/github-issues. Don't forget pull - requests. + requests. Note: If the location for reporting issues changes, do a + careful check of documentation and code to make sure any comments + that include the issue creation URL are updated. * Check `TODO` file to make sure all planned items for the release are done or retargeted. diff --git a/cSpell.json b/cSpell.json index dc2880f9..32026f36 100644 --- a/cSpell.json +++ b/cSpell.json @@ -143,6 +143,7 @@ "hcryptprov", "hdict", "hoffmann", + "holger", "hosoda", "htcondor", "htdocs", @@ -205,6 +206,7 @@ "linp", "listitem", "ljpeg", + "longjmp", "lpstr", "lqpdf", "lssl", @@ -367,6 +369,7 @@ "scarff", "seekable", "segfaulting", + "setjmp", "sharedresources", "smatch", "softlink", diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 45b87d4d..4e4c9e01 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -38,6 +38,7 @@ enum qpdf_error_code_e qpdf_e_password, /* incorrect password for encrypted file */ qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */ qpdf_e_pages, /* erroneous or unsupported pages structure */ + qpdf_e_object, /* type/bounds errors accessing objects */ }; /* Write Parameters. See QPDFWriter.hh for details. */ diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 65531fa6..31afd01b 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -609,15 +609,65 @@ class QPDFObjectHandle QPDF_DLL bool hasObjectDescription(); - // Accessor methods. If an accessor method that is valid for only - // a particular object type is called on an object of the wrong - // type, an exception is thrown. + // Accessor methods + // + // (Note: this comment is referenced in qpdf-c.h and the manual.) + // + // In PDF files, objects have specific types, but there is nothing + // that prevents PDF files from containing objects of types that + // aren't expected by the specification. Many of the accessors + // here expect objects of a particular type. Prior to qpdf 8, + // calling an accessor on a method of the wrong type, such as + // trying to get a dictionary key from an array, trying to get the + // string value of a number, etc., would throw an exception, but + // since qpdf 8, qpdf issues a warning and recovers using the + // following behavior: + // + // * Requesting a value of the wrong type (int value from string, + // array item from a scalar or dictionary, etc.) will return a + // zero-like value for that type: false for boolean, 0 for + // number, the empty string for string, or the null object for + // an object handle. + // + // * Accessing an array item that is out of bounds will return a + // null object. + // + // * Attempts to mutate an object of the wrong type (e.g., + // attempting to add a dictionary key to a scalar or array) will + // be ignored. + // + // When any of these fallback behaviors are used, qpdf issues a + // warning. Starting in qpdf 10.5, these warnings have the error + // code qpdf_e_object. Prior to 10.5, they had the error code + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with + // a QPDF object (as is the case for all objects whose origin was + // a PDF file), the warning is issued using the normal warning + // mechanism (as described in QPDF.hh), making it possible to + // suppress or otherwise detect them. If the QPDFObjectHandle is + // not associated with a QPDF object (meaning it was created + // programmatically), an exception will be thrown. + // + // The way to avoid getting any type warnings or exceptions, even + // when working with malformed PDF files, is to always check the + // type of a QPDFObjectHandle before accessing it (for example, + // make sure that isString() returns true before calling + // getStringValue()) and to always be sure that any array indices + // are in bounds. + // + // For additional discussion and rationale for this behavior, see + // the section in the QPDF manual entitled "Object Accessor + // Methods". // Methods for bool objects QPDF_DLL bool getBoolValue(); - // Methods for integer objects + // Methods for integer objects. Note: if an integer value is too + // big (too far away from zero in either direction) to fit in the + // requested return type, the maximum or minimum value for that + // return type may be returned. For example, on a system with + // 32-bit int, a numeric object with a value of 2^40 (or anything + // too big for 32 bits) will be returned as INT_MAX. QPDF_DLL long long getIntValue(); QPDF_DLL diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index af862bd4..1a8e3223 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -3048,23 +3048,17 @@ void QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warning) { - QPDF* context = 0; + QPDF* context = nullptr; std::string description; dereference(); - if (this->obj->getDescription(context, description)) - { - warn(context, - QPDFExc( - qpdf_e_damaged_pdf, + this->obj->getDescription(context, description); + // Null context handled by warn + warn(context, + QPDFExc(qpdf_e_object, "", description, 0, std::string("operation for ") + expected_type + " attempted on object of type " + getTypeName() + ": " + warning)); - } - else - { - assertType(expected_type, false); - } } void @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning, void QPDFObjectHandle::objectWarning(std::string const& warning) { - warnIfPossible(warning, true); + QPDF* context = nullptr; + std::string description; + dereference(); + this->obj->getDescription(context, description); + // Null context handled by warn + warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning)); } void diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 8a2f5233..fd3bd6fb 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4560,6 +4560,96 @@ outfile.pdf</option> filtered stream contents to a given pipeline. </para> </sect1> + <sect1 id="ref.object-accessors"> + <!-- This section is referenced in QPDFObjectHandle.hh --> + <title>Object Accessor Methods</title> + <para> + For general information about how to access instances of + <classname>QPDFObjectHandle</classname>, please see the comments + in <filename>QPDFObjectHandle.hh</filename>. Search for + “Accessor methods”. This section provides a more + in-depth discussion of the behavior and the rationale for the + behavior. + </para> + <para> + <emphasis>Why were type errors made into warnings?</emphasis> When + type checks were introduced into qpdf in the early days, it was + expected that type errors would only occur as a result of + programmer error. However, in practice, type errors would occur + with malformed PDF files because of assumptions made in code, + including code within the qpdf library and code written by library + users. The most common case would be chaining calls to + <function>getKey()</function> to access keys deep within a + dictionary. In many cases, qpdf would be able to recover from + these situations, but the old behavior often resulted in crashes + rather than graceful recovery. For this reason, the errors were + changed to warnings. + </para> + <para> + <emphasis>Why even warn about type errors when the user can't + usually do anything about them?</emphasis> Type warnings are + extremely valuable during development. Since it's impossible to + catch at compile time things like typos in dictionary key names or + logic errors around what the structure of a PDF file might be, the + presence of type warnings can save lots of developer time. They + have also proven useful in exposing issues in qpdf itself that + would have otherwise gone undetected. + </para> + <para> + <emphasis>Can there be a type-safe + <classname>QPDFObjectHandle</classname>?</emphasis> It would be + great if <classname>QPDFObjectHandle</classname> could be more + strongly typed so that you'd have to have check that something was + of a particular type before calling type-specific accessor + methods. However, implementing this at this stage of the library's + history would be quite difficult, and it would make a the common + pattern of drilling into an object no longer work. While it would + be possible to have a parallel interface, it would create a lot of + extra code. If qpdf were written in a language like rust, an + interface like this would make a lot of sense, but, for a variety + of reasons, the qpdf API is consistent with other APIs of its + time, relying on exception handling to catch errors. The + underlying PDF objects are inherently not type-safe. Forcing + stronger type safety in <classname>QPDFObjectHandle</classname> + would ultimately cause a lot more code to have to be written and + would like make software that uses qpdf more brittle, and even so, + checks would have to occur at runtime. + </para> + <para> + <emphasis>Why do type errors sometimes raise + exceptions?</emphasis> The way warnings work in qpdf requires a + <classname>QPDF</classname> object to be associated with an object + handle for a warning to be issued. It would be nice if this could + be fixed, but it would require major changes to the API. Rather + than throwing away these conditions, we convert them to + exceptions. It's not that bad though. Since any object handle that + was read from a file has an associated <classname>QPDF</classname> + object, it would only be type errors on objects that were created + explicitly that would cause exceptions, and in that case, type + errors are much more likely to be the result of a coding error + than invalid input. + </para> + <para> + <emphasis>Why does the behavior of a type exception differ between + the C and C++ API?</emphasis> There is no way to throw and catch + exceptions in C short of something like + <function>setjmp</function> and <function>longjmp</function>, and + that approach is not portable across language barriers. Since the + C API is often used from other languages, it's important to keep + things as simple as possible. Starting in qpdf 10.5, exceptions + that used to crash code using the C API will be written to stderr + by default, and it is possible to register an error handler. + There's no reason that the error handler can't simulate exception + handling in some way, such as by using <function>setjmp</function> + and <function>longjmp</function> or by setting some variable that + can be checked after library calls are made. In retrospect, it + might have been better if the C API object handle methods returned + error codes like the other methods and set return values in + passed-in pointers, but this would complicate both the + implementation and the use of the library for a case that is + actually quite rare and largely avoidable. + </para> + </sect1> </chapter> <chapter id="ref.linearization"> <title>Linearization</title> @@ -5127,6 +5217,20 @@ print "\n"; <itemizedlist> <listitem> <para> + Since qpdf version 8, using object accessor methods on an + instance of <classname>QPDFObjectHandle</classname> may + create warnings if the object is not of the expected type. + These warnings now have an error code of + <literal>qpdf_e_object</literal> instead of + <literal>qpdf_e_damaged_pdf</literal>. Also, comments have + been added to <filename>QPDFObjectHandle.hh</filename> to + explain in more detail what the behavior is. See <xref + linkend="ref.object-accessors"/> for a more in-depth + discussion. + </para> + </listitem> + <listitem> + <para> Add <function>qpdf_get_last_string_length</function> to the C API to get the length of the last string that was returned. This is needed to handle strings that contain diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 894b3a01..21f82a22 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -273,12 +273,16 @@ $td->runtest("check final version", show_ntests(); # ---------- $td->notify("--- Exceptions ---"); -$n_tests += 1; +$n_tests += 2; $td->runtest("check exception handling", {$td->COMMAND => "test_driver 61 -"}, {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("check certain exception types", + {$td->COMMAND => "test_driver 81 -"}, + {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- @@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large) $td->NORMALIZE_NEWLINES); unlink $file; } +show_ntests(); # ---------- cleanup(); diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 7d5b2ece..613dc5bd 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2) pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), p, size); } - else if (n == 61) + else if ((n == 61) || (n == 81)) { // Ignore filename argument entirely } @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2) w2.setQDFMode(true); w2.write(); } + else if (n == 81) + { + // Exercise that type errors get their own special type + try + { + QPDFObjectHandle::newNull().getIntValue(); + assert(false); + } + catch (QPDFExc& e) + { + assert(e.getErrorCode() == qpdf_e_object); + } + } else { throw std::runtime_error(std::string("invalid test ") + |