From 3340dbe9761ef35d580d77a73e17d204579624f1 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 10 Dec 2021 09:34:42 -0500 Subject: Use a specific error code for type warnings and clarify docs --- README-maintainer | 4 +- cSpell.json | 3 ++ include/qpdf/Constants.h | 1 + include/qpdf/QPDFObjectHandle.hh | 58 ++++++++++++++++++++-- libqpdf/QPDFObjectHandle.cc | 23 +++++---- manual/qpdf-manual.xml | 104 +++++++++++++++++++++++++++++++++++++++ qpdf/qtest/qpdf.test | 7 ++- qpdf/test_driver.cc | 15 +++++- 8 files changed, 196 insertions(+), 19 deletions(-) diff --git a/README-maintainer b/README-maintainer index ef0c0000..62da91e6 100644 --- a/README-maintainer +++ b/README-maintainer @@ -132,7 +132,9 @@ RELEASE PREPARATION * Check all open issues and pull requests in github and the sourceforge trackers. See ~/scripts/github-issues. Don't forget pull - requests. + requests. Note: If the location for reporting issues changes, do a + careful check of documentation and code to make sure any comments + that include the issue creation URL are updated. * Check `TODO` file to make sure all planned items for the release are done or retargeted. diff --git a/cSpell.json b/cSpell.json index dc2880f9..32026f36 100644 --- a/cSpell.json +++ b/cSpell.json @@ -143,6 +143,7 @@ "hcryptprov", "hdict", "hoffmann", + "holger", "hosoda", "htcondor", "htdocs", @@ -205,6 +206,7 @@ "linp", "listitem", "ljpeg", + "longjmp", "lpstr", "lqpdf", "lssl", @@ -367,6 +369,7 @@ "scarff", "seekable", "segfaulting", + "setjmp", "sharedresources", "smatch", "softlink", diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 45b87d4d..4e4c9e01 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -38,6 +38,7 @@ enum qpdf_error_code_e qpdf_e_password, /* incorrect password for encrypted file */ qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */ qpdf_e_pages, /* erroneous or unsupported pages structure */ + qpdf_e_object, /* type/bounds errors accessing objects */ }; /* Write Parameters. See QPDFWriter.hh for details. */ diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 65531fa6..31afd01b 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -609,15 +609,65 @@ class QPDFObjectHandle QPDF_DLL bool hasObjectDescription(); - // Accessor methods. If an accessor method that is valid for only - // a particular object type is called on an object of the wrong - // type, an exception is thrown. + // Accessor methods + // + // (Note: this comment is referenced in qpdf-c.h and the manual.) + // + // In PDF files, objects have specific types, but there is nothing + // that prevents PDF files from containing objects of types that + // aren't expected by the specification. Many of the accessors + // here expect objects of a particular type. Prior to qpdf 8, + // calling an accessor on a method of the wrong type, such as + // trying to get a dictionary key from an array, trying to get the + // string value of a number, etc., would throw an exception, but + // since qpdf 8, qpdf issues a warning and recovers using the + // following behavior: + // + // * Requesting a value of the wrong type (int value from string, + // array item from a scalar or dictionary, etc.) will return a + // zero-like value for that type: false for boolean, 0 for + // number, the empty string for string, or the null object for + // an object handle. + // + // * Accessing an array item that is out of bounds will return a + // null object. + // + // * Attempts to mutate an object of the wrong type (e.g., + // attempting to add a dictionary key to a scalar or array) will + // be ignored. + // + // When any of these fallback behaviors are used, qpdf issues a + // warning. Starting in qpdf 10.5, these warnings have the error + // code qpdf_e_object. Prior to 10.5, they had the error code + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with + // a QPDF object (as is the case for all objects whose origin was + // a PDF file), the warning is issued using the normal warning + // mechanism (as described in QPDF.hh), making it possible to + // suppress or otherwise detect them. If the QPDFObjectHandle is + // not associated with a QPDF object (meaning it was created + // programmatically), an exception will be thrown. + // + // The way to avoid getting any type warnings or exceptions, even + // when working with malformed PDF files, is to always check the + // type of a QPDFObjectHandle before accessing it (for example, + // make sure that isString() returns true before calling + // getStringValue()) and to always be sure that any array indices + // are in bounds. + // + // For additional discussion and rationale for this behavior, see + // the section in the QPDF manual entitled "Object Accessor + // Methods". // Methods for bool objects QPDF_DLL bool getBoolValue(); - // Methods for integer objects + // Methods for integer objects. Note: if an integer value is too + // big (too far away from zero in either direction) to fit in the + // requested return type, the maximum or minimum value for that + // return type may be returned. For example, on a system with + // 32-bit int, a numeric object with a value of 2^40 (or anything + // too big for 32 bits) will be returned as INT_MAX. QPDF_DLL long long getIntValue(); QPDF_DLL diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index af862bd4..1a8e3223 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -3048,23 +3048,17 @@ void QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warning) { - QPDF* context = 0; + QPDF* context = nullptr; std::string description; dereference(); - if (this->obj->getDescription(context, description)) - { - warn(context, - QPDFExc( - qpdf_e_damaged_pdf, + this->obj->getDescription(context, description); + // Null context handled by warn + warn(context, + QPDFExc(qpdf_e_object, "", description, 0, std::string("operation for ") + expected_type + " attempted on object of type " + getTypeName() + ": " + warning)); - } - else - { - assertType(expected_type, false); - } } void @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning, void QPDFObjectHandle::objectWarning(std::string const& warning) { - warnIfPossible(warning, true); + QPDF* context = nullptr; + std::string description; + dereference(); + this->obj->getDescription(context, description); + // Null context handled by warn + warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning)); } void diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 8a2f5233..fd3bd6fb 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4560,6 +4560,96 @@ outfile.pdf filtered stream contents to a given pipeline. + + + Object Accessor Methods + + For general information about how to access instances of + QPDFObjectHandle, please see the comments + in QPDFObjectHandle.hh. Search for + “Accessor methods”. This section provides a more + in-depth discussion of the behavior and the rationale for the + behavior. + + + Why were type errors made into warnings? When + type checks were introduced into qpdf in the early days, it was + expected that type errors would only occur as a result of + programmer error. However, in practice, type errors would occur + with malformed PDF files because of assumptions made in code, + including code within the qpdf library and code written by library + users. The most common case would be chaining calls to + getKey() to access keys deep within a + dictionary. In many cases, qpdf would be able to recover from + these situations, but the old behavior often resulted in crashes + rather than graceful recovery. For this reason, the errors were + changed to warnings. + + + Why even warn about type errors when the user can't + usually do anything about them? Type warnings are + extremely valuable during development. Since it's impossible to + catch at compile time things like typos in dictionary key names or + logic errors around what the structure of a PDF file might be, the + presence of type warnings can save lots of developer time. They + have also proven useful in exposing issues in qpdf itself that + would have otherwise gone undetected. + + + Can there be a type-safe + QPDFObjectHandle? It would be + great if QPDFObjectHandle could be more + strongly typed so that you'd have to have check that something was + of a particular type before calling type-specific accessor + methods. However, implementing this at this stage of the library's + history would be quite difficult, and it would make a the common + pattern of drilling into an object no longer work. While it would + be possible to have a parallel interface, it would create a lot of + extra code. If qpdf were written in a language like rust, an + interface like this would make a lot of sense, but, for a variety + of reasons, the qpdf API is consistent with other APIs of its + time, relying on exception handling to catch errors. The + underlying PDF objects are inherently not type-safe. Forcing + stronger type safety in QPDFObjectHandle + would ultimately cause a lot more code to have to be written and + would like make software that uses qpdf more brittle, and even so, + checks would have to occur at runtime. + + + Why do type errors sometimes raise + exceptions? The way warnings work in qpdf requires a + QPDF object to be associated with an object + handle for a warning to be issued. It would be nice if this could + be fixed, but it would require major changes to the API. Rather + than throwing away these conditions, we convert them to + exceptions. It's not that bad though. Since any object handle that + was read from a file has an associated QPDF + object, it would only be type errors on objects that were created + explicitly that would cause exceptions, and in that case, type + errors are much more likely to be the result of a coding error + than invalid input. + + + Why does the behavior of a type exception differ between + the C and C++ API? There is no way to throw and catch + exceptions in C short of something like + setjmp and longjmp, and + that approach is not portable across language barriers. Since the + C API is often used from other languages, it's important to keep + things as simple as possible. Starting in qpdf 10.5, exceptions + that used to crash code using the C API will be written to stderr + by default, and it is possible to register an error handler. + There's no reason that the error handler can't simulate exception + handling in some way, such as by using setjmp + and longjmp or by setting some variable that + can be checked after library calls are made. In retrospect, it + might have been better if the C API object handle methods returned + error codes like the other methods and set return values in + passed-in pointers, but this would complicate both the + implementation and the use of the library for a case that is + actually quite rare and largely avoidable. + + Linearization @@ -5125,6 +5215,20 @@ print "\n"; Library Enhancements + + + Since qpdf version 8, using object accessor methods on an + instance of QPDFObjectHandle may + create warnings if the object is not of the expected type. + These warnings now have an error code of + qpdf_e_object instead of + qpdf_e_damaged_pdf. Also, comments have + been added to QPDFObjectHandle.hh to + explain in more detail what the behavior is. See for a more in-depth + discussion. + + Add qpdf_get_last_string_length to the diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 894b3a01..21f82a22 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -273,12 +273,16 @@ $td->runtest("check final version", show_ntests(); # ---------- $td->notify("--- Exceptions ---"); -$n_tests += 1; +$n_tests += 2; $td->runtest("check exception handling", {$td->COMMAND => "test_driver 61 -"}, {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("check certain exception types", + {$td->COMMAND => "test_driver 81 -"}, + {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- @@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large) $td->NORMALIZE_NEWLINES); unlink $file; } +show_ntests(); # ---------- cleanup(); diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 7d5b2ece..613dc5bd 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2) pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), p, size); } - else if (n == 61) + else if ((n == 61) || (n == 81)) { // Ignore filename argument entirely } @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2) w2.setQDFMode(true); w2.write(); } + else if (n == 81) + { + // Exercise that type errors get their own special type + try + { + QPDFObjectHandle::newNull().getIntValue(); + assert(false); + } + catch (QPDFExc& e) + { + assert(e.getErrorCode() == qpdf_e_object); + } + } else { throw std::runtime_error(std::string("invalid test ") + -- cgit v1.2.3-54-g00ecf