diff options
96 files changed, 3003 insertions, 2168 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e50f0b5..2581fd7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,9 @@ CMAKE_DEPENDENT_OPTION( GENERATE_AUTO_JOB "Automatically regenerate job files" OFF "NOT MAINTAINER_MODE" ON) CMAKE_DEPENDENT_OPTION( + ENABLE_QTC "Enable QTC test coverage" OFF + "NOT MAINTAINER_MODE" ON) +CMAKE_DEPENDENT_OPTION( SHOW_FAILED_TEST_OUTPUT "Show qtest output on failure" OFF "NOT CI_MODE" ON) @@ -110,8 +113,15 @@ endif() add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:POINTERHOLDER_TRANSITION=4>) +if(ENABLE_QTC) + set(ENABLE_QTC_ARG) +else() + add_compile_definitions(QPDF_DISABLE_QTC=1) + set(ENABLE_QTC_ARG --disable-tc) +endif() + enable_testing() -set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest) +set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG}) if(WIN32) find_program(COPY_COMMAND NAMES cp copy) @@ -335,6 +345,7 @@ message(STATUS " build shared libraries: ${BUILD_SHARED_LIBS}") message(STATUS " build static libraries: ${BUILD_STATIC_LIBS}") message(STATUS " build manual: ${BUILD_DOC}") message(STATUS " compiler warnings are errors: ${WERROR}") +message(STATUS " QTC test coverage: ${ENABLE_QTC}") message(STATUS " system: ${CPACK_SYSTEM_NAME}") message(STATUS "") message(STATUS "*** Options Summary ***") @@ -1,3 +1,30 @@ +2022-08-31 Jay Berkenbilt <ejb@ql.org> + + * From m-holger (#729): refactor QPDF's parser into a new + QPDFParser class, cleaning the code, significantly improving + performance. + +2022-08-27 Jay Berkenbilt <ejb@ql.org> + + * From m-holger: major refactoring of QPDFTokenizer to improve + readability and to optimize performance. This also included some + optimizations to some InputSource classes. Thanks for this + excellent contribution. Fixes #749, #442. + +2022-08-07 Jay Berkenbilt <ejb@ql.org> + + * Add new build configuration option ENABLE_QTC, which is off by + default when not running in MAINTAINER_MODE. When this is off, + QTC coverage calls sprinkled throughout the qpdf source code are + compiled out for increased performance. See "Build Options" in the + manual for a discussion. Fixes #714. + +2022-08-06 Jay Berkenbilt <ejb@ql.org> + + * Added by m-holger: QPDF::getObject() method as a simpler form of + getObjectByID or getObjectByObjGen. The older methods are being + retained for compatibility and are not deprecated. + 2022-07-24 Jay Berkenbilt <ejb@ql.org> * include/qpdf/JSON.hh: Schema validation: allow a single item to @@ -4,9 +4,14 @@ Next Before Release: -* At next release, hide release-qpdf-10.6.3.0cmake* versions at readthedocs +* Review in order #726 +* Make ./performance_check usable by other people by having published + files to use for testing. + * https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf +* Incorporate --report-mem-usage into performance testing. Make sure + there is some test somewhere that exercises the millions of nulls case. +* Evaluate issues tagged with `next` * Stay on top of https://github.com/pikepdf/pikepdf/pull/315 -* Release qtest with updates to qtest-driver and copy back into qpdf Next: * Support json v2 in the C API. At a minimum, write_json, @@ -21,31 +26,23 @@ Pending changes: appimage build specifically is setting the runpath, which is actually desirable in this case. Make sure to understand and document this. Maybe add a check for it in the build. -* Decide what to do about #664 (get*Box) -* Add an option --ignore-encryption to ignore encryption information - and treat encrypted files as if they weren't encrypted. This should - make it possible to solve #598 (--show-encryption without a - password). We'll need to make sure we don't try to filter any - streams in this mode. Ideally we should be able to combine this with - --json so we can look at the raw encrypted strings and streams if we - want to, though be sure to document that the resulting JSON won't be - convertible back to a valid PDF. Since providing the password may - reveal additional details, --show-encryption could potentially retry - with this option if the first time doesn't work. Then, with the file - open, we can read the encryption dictionary normally. -* In libtests, separate executables that need the object library - from those that strictly use public API. Move as many of the test - drivers from the qpdf directory into the latter category as long - as doing so isn't too troublesome from a coverage standpoint. -* Consider adding fuzzer code for JSON -* Consider generating a non-flat pages tree before creating output to - better handle files with lots of pages. If there are more than 256 - pages, add a second layer with the second layer nodes having no more - than 256 nodes and being as evenly sizes as possible. Don't worry - about the case of more than 65,536 pages. If the top node has more - than 256 children, we'll live with it. -Parent pointer idea: +Soon: Break ground on "Document-level work" + +Remove raw pointers from the API +================================ + +(For qpdf >= 12) + +See if we can remove raw pointers from the QPDF API. There's a +discussion in https://github.com/qpdf/qpdf/pull/747. + +Fix Multiple Direct Object Owner Issue +====================================== + +These are some ideas I've had, but I'm parking them until I fully +understand m-holger's proposal to split QPDFObject into QPDFObject and +QPDFValue. * Add std::weak_ptr<QPDFObject> parent to QPDFObject. When adding a direct object to an array or dictionary, set its parent. When @@ -65,8 +62,6 @@ Note that arrays and dictionaries still need to contain QPDFObjectHandle because of indirect objects. This only pertains to direct objects, which are always "resolved" in QPDFObjectHandle. -Soon: Break ground on "Document-level work" - Possible future JSON enhancements ================================= @@ -376,169 +371,196 @@ directory or that are otherwise not publicly accessible. This includes things sent to me by email that are specifically not public. Even so, I find it useful to make reference to them in this list. - * Look at https://bestpractices.coreinfrastructure.org/en - - * Rework tests so that nothing is written into the source directory. - Ideally then the entire build could be done with a read-only - source tree. - - * Large file tests fail with linux32 before and after cmake. This was - first noticed after 10.6.3. I don't think it's worth fixing. - - * Consider updating the fuzzer with code that exercises - copyAnnotations, file attachments, and name and number trees. Check - fuzzer coverage. - - * Add code for creation of a file attachment annotation. It should - also be possible to create a widget annotation and a form field. - Update the pdf-attach-file.cc example with new APIs when ready. - - * Flattening of form XObjects seems like something that would be - useful in the library. We are seeing more cases of completely valid - PDF files with form XObjects that cause problems in other software. - Flattening of form XObjects could be a useful way to work around - those issues or to prepare files for additional processing, making - it possible for users of the qpdf library to not be concerned about - form XObjects. This could be done recursively; i.e., we could have a - method to embed a form XObject into whatever contains it, whether - that is a form XObject or a page. This would require more - significant interpretation of the content stream. We would need a - test file in which the placement of the form XObject has to be in - the right place, e.g., the form XObject partially obscures earlier - code and is partially obscured by later code. Keys in the resource - dictionary may need to be changed -- create test cases with lots of - duplicated/overlapping keys. - - * Part of closed_file_input_source.cc is disabled on Windows because - of odd failures. It might be worth investigating so we can fully - exercise this in the test suite. That said, ClosedFileInputSource - is exercised elsewhere in qpdf's test suite, so this is not that - pressing. - - * If possible, consider adding CCITT3, CCITT4, or any other easy - filters. For some reference code that we probably can't use but may - be handy anyway, see - http://partners.adobe.com/public/developer/ps/sdk/index_archive.html - - * If possible, support the following types of broken files: - - - Files that have no whitespace token after "endobj" such that - endobj collides with the start of the next object - - - See ../misc/broken-files - - - See ../misc/bad-files-issue-476. This directory contains a - snapshot of the google doc and linked PDF files from issue #476. - Please see the issue for details. - - * Additional form features - * set value from CLI? Specify title, and provide way to - disambiguate, probably by giving objgen of field - - * Pl_TIFFPredictor is pretty slow. - - * Support for handling file names with Unicode characters in Windows - is incomplete. qpdf seems to support them okay from a functionality - standpoint, and the right thing happens if you pass in UTF-8 - encoded filenames to QPDF library routines in Windows (they are - converted internally to wchar_t*), but file names are encoded in - UTF-8 on output, which doesn't produce nice error messages or - output on Windows in some cases. - - * If we ever wanted to do anything more with character encoding, see - ../misc/character-encoding/, which includes machine-readable dump - of table D.2 in the ISO-32000 PDF spec. This shows the mapping - between Unicode, StandardEncoding, WinAnsiEncoding, - MacRomanEncoding, and PDFDocEncoding. - - * Some test cases on bad files fail because qpdf is unable to find - the root dictionary when it fails to read the trailer. Recovery - could find the root dictionary and even the info dictionary in - other ways. In particular, issue-202.pdf can be opened by evince, - and there's no real reason that qpdf couldn't be made to be able to - recover that file as well. - - * Audit every place where qpdf allocates memory to see whether there - are cases where malicious inputs could cause qpdf to attempt to - grab very large amounts of memory. Certainly there are cases like - this, such as if a very highly compressed, very large image stream - is requested in a buffer. Hopefully normal input to output - filtering doesn't ever try to do this. QPDFWriter should be checked - carefully too. See also bugs/private/from-email-663916/ - - * Interactive form modification: - https://github.com/qpdf/qpdf/issues/213 contains a good discussion - of some ideas for adding methods to modify annotations and form - fields if we want to make it easier to support modifications to - interactive forms. Some of the ideas have been implemented, and - some of the probably never will be implemented, but it's worth a - read if there is an intention to work on this. In the issue, search - for "Regarding write functionality", and read that comment and the - responses to it. - - * Look at ~/Q/pdf-collection/forms-from-appian/ - - * When decrypting files with /R=6, hash_V5 is called more than once - with the same inputs. Caching the results or refactoring to reduce - the number of identical calls could improve performance for - workloads that involve processing large numbers of small files. - - * Consider adding a method to balance the pages tree. It would call - pushInheritedAttributesToPage, construct a pages tree from scratch, - and replace the /Pages key of the root dictionary with the new - tree. - - * Study what's required to support savable forms that can be saved by - Adobe Reader. Does this require actually signing the document with - an Adobe private key? Search for "Digital signatures" in the PDF - spec, and look at ~/Q/pdf-collection/form-with-full-save.pdf, which - came from Adobe's example site. See also - ../misc/digital-sign-from-trueroad/. If digital signatures are - implemented, update the docs on crypto providers, which mention - that this may happen in the future. - - * Qpdf does not honor /EFF when adding new file attachments. When it - encrypts, it never generates streams with explicit crypt filters. - Prior to 10.2, there was an incorrect attempt to treat /EFF as a - default value for decrypting file attachment streams, but it is not - supposed to mean that. Instead, it is intended for conforming - writers to obey this when adding new attachments. Qpdf is not a - conforming writer in that respect. - - * The whole xref handling code in the QPDF object allows the same - object with more than one generation to coexist, but a lot of logic - assumes this isn't the case. Anything that creates mappings only - with the object number and not the generation is this way, - including most of the interaction between QPDFWriter and QPDF. If - we wanted to allow the same object with more than one generation to - coexist, which I'm not sure is allowed, we could fix this by - changing xref_table. Alternatively, we could detect and disallow - that case. In fact, it appears that Adobe reader and other PDF - viewing software silently ignores objects of this type, so this is - probably not a big deal. - - * From a suggestion in bug 3152169, consider having an option to - re-encode inline images with an ASCII encoding. - - * From github issue 2, provide more in-depth output for examining - hint stream contents. Consider adding on option to provide a - human-readable dump of linearization hint tables. This should - include improving the 'overflow reading bit stream' message as - reported in issue #2. There are multiple calls to stopOnError in - the linearization checking code. Ideally, these should not - terminate checking. It would require re-acquiring an understanding - of all that code to make the checks more robust. In particular, - it's hard to look at the code and quickly determine what is a true - logic error and what could happen because of malformed user input. - See also ../misc/linearization-errors. - - * If I ever decide to make appearance stream-generation aware of - fonts or font metrics, see email from Tobias with Message-ID - <5C3C9C6C.8000102@thax.hardliners.org> dated 2019-01-14. - - * Look at places in the code where object traversal is being done and, - where possible, try to avoid it entirely or at least avoid ever - traversing the same objects multiple times. +* Add an option --ignore-encryption to ignore encryption information + and treat encrypted files as if they weren't encrypted. This should + make it possible to solve #598 (--show-encryption without a + password). We'll need to make sure we don't try to filter any + streams in this mode. Ideally we should be able to combine this with + --json so we can look at the raw encrypted strings and streams if we + want to, though be sure to document that the resulting JSON won't be + convertible back to a valid PDF. Since providing the password may + reveal additional details, --show-encryption could potentially retry + with this option if the first time doesn't work. Then, with the file + open, we can read the encryption dictionary normally. + +* In libtests, separate executables that need the object library + from those that strictly use public API. Move as many of the test + drivers from the qpdf directory into the latter category as long + as doing so isn't too troublesome from a coverage standpoint. + +* Consider generating a non-flat pages tree before creating output to + better handle files with lots of pages. If there are more than 256 + pages, add a second layer with the second layer nodes having no more + than 256 nodes and being as evenly sizes as possible. Don't worry + about the case of more than 65,536 pages. If the top node has more + than 256 children, we'll live with it. This is only safe if all + intermediate page nodes have only /Kids, /Parent, /Type, and /Count. + +* Look at https://bestpractices.coreinfrastructure.org/en + +* Consider adding fuzzer code for JSON + +* Rework tests so that nothing is written into the source directory. + Ideally then the entire build could be done with a read-only + source tree. + +* Large file tests fail with linux32 before and after cmake. This was + first noticed after 10.6.3. I don't think it's worth fixing. + +* Consider updating the fuzzer with code that exercises + copyAnnotations, file attachments, and name and number trees. Check + fuzzer coverage. + +* Add code for creation of a file attachment annotation. It should + also be possible to create a widget annotation and a form field. + Update the pdf-attach-file.cc example with new APIs when ready. + +* Flattening of form XObjects seems like something that would be + useful in the library. We are seeing more cases of completely valid + PDF files with form XObjects that cause problems in other software. + Flattening of form XObjects could be a useful way to work around + those issues or to prepare files for additional processing, making + it possible for users of the qpdf library to not be concerned about + form XObjects. This could be done recursively; i.e., we could have a + method to embed a form XObject into whatever contains it, whether + that is a form XObject or a page. This would require more + significant interpretation of the content stream. We would need a + test file in which the placement of the form XObject has to be in + the right place, e.g., the form XObject partially obscures earlier + code and is partially obscured by later code. Keys in the resource + dictionary may need to be changed -- create test cases with lots of + duplicated/overlapping keys. + +* Part of closed_file_input_source.cc is disabled on Windows because + of odd failures. It might be worth investigating so we can fully + exercise this in the test suite. That said, ClosedFileInputSource + is exercised elsewhere in qpdf's test suite, so this is not that + pressing. + +* If possible, consider adding CCITT3, CCITT4, or any other easy + filters. For some reference code that we probably can't use but may + be handy anyway, see + http://partners.adobe.com/public/developer/ps/sdk/index_archive.html + +* If possible, support the following types of broken files: + + - Files that have no whitespace token after "endobj" such that + endobj collides with the start of the next object + + - See ../misc/broken-files + + - See ../misc/bad-files-issue-476. This directory contains a + snapshot of the google doc and linked PDF files from issue #476. + Please see the issue for details. + +* Additional form features + * set value from CLI? Specify title, and provide way to + disambiguate, probably by giving objgen of field + +* Pl_TIFFPredictor is pretty slow. + +* Support for handling file names with Unicode characters in Windows + is incomplete. qpdf seems to support them okay from a functionality + standpoint, and the right thing happens if you pass in UTF-8 + encoded filenames to QPDF library routines in Windows (they are + converted internally to wchar_t*), but file names are encoded in + UTF-8 on output, which doesn't produce nice error messages or + output on Windows in some cases. + +* If we ever wanted to do anything more with character encoding, see + ../misc/character-encoding/, which includes machine-readable dump + of table D.2 in the ISO-32000 PDF spec. This shows the mapping + between Unicode, StandardEncoding, WinAnsiEncoding, + MacRomanEncoding, and PDFDocEncoding. + +* Some test cases on bad files fail because qpdf is unable to find + the root dictionary when it fails to read the trailer. Recovery + could find the root dictionary and even the info dictionary in + other ways. In particular, issue-202.pdf can be opened by evince, + and there's no real reason that qpdf couldn't be made to be able to + recover that file as well. + +* Audit every place where qpdf allocates memory to see whether there + are cases where malicious inputs could cause qpdf to attempt to + grab very large amounts of memory. Certainly there are cases like + this, such as if a very highly compressed, very large image stream + is requested in a buffer. Hopefully normal input to output + filtering doesn't ever try to do this. QPDFWriter should be checked + carefully too. See also bugs/private/from-email-663916/ + +* Interactive form modification: + https://github.com/qpdf/qpdf/issues/213 contains a good discussion + of some ideas for adding methods to modify annotations and form + fields if we want to make it easier to support modifications to + interactive forms. Some of the ideas have been implemented, and + some of the probably never will be implemented, but it's worth a + read if there is an intention to work on this. In the issue, search + for "Regarding write functionality", and read that comment and the + responses to it. + +* Look at ~/Q/pdf-collection/forms-from-appian/ + +* When decrypting files with /R=6, hash_V5 is called more than once + with the same inputs. Caching the results or refactoring to reduce + the number of identical calls could improve performance for + workloads that involve processing large numbers of small files. + +* Consider adding a method to balance the pages tree. It would call + pushInheritedAttributesToPage, construct a pages tree from scratch, + and replace the /Pages key of the root dictionary with the new + tree. + +* Study what's required to support savable forms that can be saved by + Adobe Reader. Does this require actually signing the document with + an Adobe private key? Search for "Digital signatures" in the PDF + spec, and look at ~/Q/pdf-collection/form-with-full-save.pdf, which + came from Adobe's example site. See also + ../misc/digital-sign-from-trueroad/. If digital signatures are + implemented, update the docs on crypto providers, which mention + that this may happen in the future. + +* Qpdf does not honor /EFF when adding new file attachments. When it + encrypts, it never generates streams with explicit crypt filters. + Prior to 10.2, there was an incorrect attempt to treat /EFF as a + default value for decrypting file attachment streams, but it is not + supposed to mean that. Instead, it is intended for conforming + writers to obey this when adding new attachments. Qpdf is not a + conforming writer in that respect. + +* The whole xref handling code in the QPDF object allows the same + object with more than one generation to coexist, but a lot of logic + assumes this isn't the case. Anything that creates mappings only + with the object number and not the generation is this way, + including most of the interaction between QPDFWriter and QPDF. If + we wanted to allow the same object with more than one generation to + coexist, which I'm not sure is allowed, we could fix this by + changing xref_table. Alternatively, we could detect and disallow + that case. In fact, it appears that Adobe reader and other PDF + viewing software silently ignores objects of this type, so this is + probably not a big deal. + +* From a suggestion in bug 3152169, consider having an option to + re-encode inline images with an ASCII encoding. + +* From github issue 2, provide more in-depth output for examining + hint stream contents. Consider adding on option to provide a + human-readable dump of linearization hint tables. This should + include improving the 'overflow reading bit stream' message as + reported in issue #2. There are multiple calls to stopOnError in + the linearization checking code. Ideally, these should not + terminate checking. It would require re-acquiring an understanding + of all that code to make the checks more robust. In particular, + it's hard to look at the code and quickly determine what is a true + logic error and what could happen because of malformed user input. + See also ../misc/linearization-errors. + +* If I ever decide to make appearance stream-generation aware of + fonts or font metrics, see email from Tobias with Message-ID + <5C3C9C6C.8000102@thax.hardliners.org> dated 2019-01-14. + +* Look at places in the code where object traversal is being done and, + where possible, try to avoid it entirely or at least avoid ever + traversing the same objects multiple times. ---------------------------------------------------------------------- diff --git a/build-scripts/test-sanitizers b/build-scripts/test-sanitizers index c3c314f9..75ac8af0 100755 --- a/build-scripts/test-sanitizers +++ b/build-scripts/test-sanitizers @@ -10,7 +10,8 @@ env CFLAGS="-fsanitize=address -fsanitize=undefined" \ CC=clang CXX=clang++ \ cmake -S . -B build \ -DCI_MODE=1 -DBUILD_SHARED_LIBS=0 -DCMAKE_BUILD_TYPE=Debug \ - -DREQUIRE_CRYPTO_OPENSSL=1 -DREQUIRE_CRYPTO_GNUTLS=1 + -DREQUIRE_CRYPTO_OPENSSL=1 -DREQUIRE_CRYPTO_GNUTLS=1 \ + -DENABLE_QTC=1 cmake --build build -j$(nproc) -- -k cd build # libtests automatically runs with all crypto providers. diff --git a/cSpell.json b/cSpell.json index 6251d984..88f7e22d 100644 --- a/cSpell.json +++ b/cSpell.json @@ -131,6 +131,7 @@ "esize", "eval", "extlibdir", + "fclose", "fdict", "ffield", "fghij", @@ -268,6 +269,7 @@ "maxdepth", "maxobjectid", "mdash", + "memstream", "mindepth", "mkdir", "mkinstalldirs", diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh index b965704f..1a93815b 100644 --- a/include/qpdf/BufferInputSource.hh +++ b/include/qpdf/BufferInputSource.hh @@ -54,26 +54,11 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource virtual void unreadCh(char ch); private: - class QPDF_DLL_PRIVATE Members - { - friend class BufferInputSource; - - public: - QPDF_DLL - ~Members() = default; - - private: - Members(bool own_memory, std::string const& description, Buffer* buf); - Members(Members const&) = delete; - - bool own_memory; - std::string description; - Buffer* buf; - qpdf_offset_t cur_offset; - qpdf_offset_t max_offset; - }; - - std::shared_ptr<Members> m; + bool own_memory; + std::string description; + Buffer* buf; + qpdf_offset_t cur_offset; + qpdf_offset_t max_offset; }; #endif // QPDF_BUFFERINPUTSOURCE_HH diff --git a/include/qpdf/ClosedFileInputSource.hh b/include/qpdf/ClosedFileInputSource.hh index c72a1df8..b23c2767 100644 --- a/include/qpdf/ClosedFileInputSource.hh +++ b/include/qpdf/ClosedFileInputSource.hh @@ -73,23 +73,10 @@ class QPDF_DLL_CLASS ClosedFileInputSource: public InputSource QPDF_DLL_PRIVATE void after(); - class QPDF_DLL_PRIVATE Members - { - friend class ClosedFileInputSource; - - public: - QPDF_DLL - ~Members() = default; - - private: - Members(char const* filename); - - std::string filename; - qpdf_offset_t offset; - std::shared_ptr<FileInputSource> fis; - bool stay_open; - }; - std::shared_ptr<Members> m; + std::string filename; + qpdf_offset_t offset; + std::shared_ptr<FileInputSource> fis; + bool stay_open; }; #endif // QPDF_CLOSEDFILEINPUTSOURCE_HH diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 5d2113bd..cf6bdaef 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -82,6 +82,8 @@ enum qpdf_object_type_e { /* Additional object types that can occur in content streams */ ot_operator, ot_inlineimage, + /* Object types internal to qpdf */ + ot_unresolved, /* NOTE: if adding to this list, update QPDFObject.hh */ }; diff --git a/include/qpdf/FileInputSource.hh b/include/qpdf/FileInputSource.hh index f1e7edf4..9e0d57fb 100644 --- a/include/qpdf/FileInputSource.hh +++ b/include/qpdf/FileInputSource.hh @@ -58,24 +58,9 @@ class QPDF_DLL_CLASS FileInputSource: public InputSource FileInputSource(FileInputSource const&) = delete; FileInputSource& operator=(FileInputSource const&) = delete; - class QPDF_DLL_PRIVATE Members - { - friend class FileInputSource; - - public: - QPDF_DLL - ~Members(); - - private: - Members(bool close_file); - Members(Members const&) = delete; - - bool close_file; - std::string filename; - FILE* file; - }; - - std::shared_ptr<Members> m; + bool close_file; + std::string filename; + FILE* file; }; #endif // QPDF_FILEINPUTSOURCE_HH diff --git a/include/qpdf/InputSource.hh b/include/qpdf/InputSource.hh index 9feb8ec3..e9d99cdb 100644 --- a/include/qpdf/InputSource.hh +++ b/include/qpdf/InputSource.hh @@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource // efficient. virtual void unreadCh(char ch) = 0; + // The following methods are for use by QPDFTokenizer + inline qpdf_offset_t fastTell(); + inline bool fastRead(char&); + inline void fastUnread(bool); + inline void loadBuffer(); + protected: qpdf_offset_t last_offset; @@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource }; std::shared_ptr<Members> m; + + // State for fast... methods + static const qpdf_offset_t buf_size = 128; + char buffer[buf_size]; + qpdf_offset_t buf_len = 0; + qpdf_offset_t buf_idx = 0; + qpdf_offset_t buf_start = 0; }; +inline void +InputSource::loadBuffer() +{ + this->buf_idx = 0; + this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size)); + // NB read sets last_offset + this->buf_start = this->last_offset; +} + +inline qpdf_offset_t +InputSource::fastTell() +{ + if (this->buf_len == 0) { + loadBuffer(); + } else { + auto curr = tell(); + if (curr < this->buf_start || + curr >= (this->buf_start + this->buf_len)) { + loadBuffer(); + } else { + this->last_offset = curr; + this->buf_idx = curr - this->buf_start; + } + } + return this->last_offset; +} + +inline bool +InputSource::fastRead(char& ch) +{ + // Before calling fastRead, fastTell must be called to prepare the buffer. + // Once reading is complete, fastUnread must be called to set the correct + // file position. + if (this->buf_idx < this->buf_len) { + ch = this->buffer[this->buf_idx]; + ++(this->buf_idx); + ++(this->last_offset); + return true; + + } else if (this->buf_len == 0) { + return false; + } else { + seek(this->buf_start + this->buf_len, SEEK_SET); + fastTell(); + return fastRead(ch); + } +} + +inline void +InputSource::fastUnread(bool back) +{ + this->last_offset -= back ? 1 : 0; + seek(this->last_offset, SEEK_SET); +} + #endif // QPDF_INPUTSOURCE_HH diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 64f31edd..81169fbd 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -49,6 +49,7 @@ class QPDF_Stream; class BitStream; class BitWriter; class QPDFLogger; +class QPDFParser; class QPDF { @@ -382,8 +383,15 @@ class QPDF QPDF_DLL QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); - // Retrieve an object by object ID and generation. Returns an - // indirect reference to it. + // Retrieve an object by object ID and generation. Returns an + // indirect reference to it. The getObject() methods were added + // for qpdf 11. + QPDF_DLL + QPDFObjectHandle getObject(QPDFObjGen const&); + QPDF_DLL + QPDFObjectHandle getObject(int objid, int generation); + // These are older methods, but there is no intention to deprecate + // them. QPDF_DLL QPDFObjectHandle getObjectByObjGen(QPDFObjGen const&); QPDF_DLL @@ -835,19 +843,13 @@ class QPDF // it can resolve indirect references. class Resolver { - friend class QPDFObjectHandle; + friend class QPDFObject; private: - static std::shared_ptr<QPDFObject> + static void resolve(QPDF* qpdf, QPDFObjGen const& og) { - return qpdf->resolve(og); - } - static bool - objectChanged( - QPDF* qpdf, QPDFObjGen const& og, std::shared_ptr<QPDFObject>& oph) - { - return qpdf->objectChanged(og, oph); + qpdf->resolve(og); } }; friend class Resolver; @@ -874,7 +876,7 @@ class QPDF // resolution class ParseGuard { - friend class QPDFObjectHandle; + friend class QPDFParser; private: ParseGuard(QPDF* qpdf) : @@ -1166,12 +1168,20 @@ class QPDF std::string const& description, QPDFObjGen const& exp_og, QPDFObjGen& og); - bool objectChanged(QPDFObjGen const& og, std::shared_ptr<QPDFObject>& oph); - std::shared_ptr<QPDFObject> resolve(QPDFObjGen const& og); + void resolve(QPDFObjGen const& og); void resolveObjectsInStream(int obj_stream_number); void stopOnError(std::string const& message); QPDFObjectHandle reserveObjectIfNotExists(QPDFObjGen const& og); QPDFObjectHandle reserveStream(QPDFObjGen const& og); + QPDFObjectHandle + newIndirect(QPDFObjGen const&, std::shared_ptr<QPDFObject> const&); + bool isCached(QPDFObjGen const& og); + bool isUnresolved(QPDFObjGen const& og); + void updateCache( + QPDFObjGen const& og, + std::shared_ptr<QPDFObject> const& object, + qpdf_offset_t end_before_space, + qpdf_offset_t end_after_space); // Calls finish() on the pipeline when done but does not delete it bool pipeStreamData( @@ -1716,7 +1726,6 @@ class QPDF bool in_parse; bool parsed; std::set<int> resolved_object_streams; - bool ever_replaced_objects; // Linearization data qpdf_offset_t first_xref_item_offset; // actual value from file diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 0e4d8a2e..2d4ab0d2 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -711,6 +711,7 @@ class QPDFJob bool json_input; bool json_output; std::string update_from_json; + bool report_mem_usage; }; std::shared_ptr<Members> m; }; diff --git a/include/qpdf/QPDFNameTreeObjectHelper.hh b/include/qpdf/QPDFNameTreeObjectHelper.hh index 7093ca2e..006ab158 100644 --- a/include/qpdf/QPDFNameTreeObjectHelper.hh +++ b/include/qpdf/QPDFNameTreeObjectHelper.hh @@ -42,7 +42,7 @@ class NNTreeImpl; class NNTreeIterator; class NNTreeDetails; -class QPDFNameTreeObjectHelper: public QPDFObjectHelper +class QPDF_DLL_CLASS QPDFNameTreeObjectHelper: public QPDFObjectHelper { public: // The qpdf object is required so that this class can issue @@ -55,7 +55,7 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper static QPDFNameTreeObjectHelper newEmpty(QPDF&, bool auto_repair = true); QPDF_DLL - virtual ~QPDFNameTreeObjectHelper() = default; + virtual ~QPDFNameTreeObjectHelper(); // Return whether the number tree has an explicit entry for this // number. @@ -67,7 +67,7 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper QPDF_DLL bool findObject(std::string const& utf8, QPDFObjectHandle& oh); - class iterator + class QPDF_DLL_PRIVATE iterator { friend class QPDFNameTreeObjectHelper; @@ -181,7 +181,7 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper void setSplitThreshold(int); private: - class Members + class QPDF_DLL_PRIVATE Members { friend class QPDFNameTreeObjectHelper; diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh index b053b5f4..c1ffc251 100644 --- a/include/qpdf/QPDFNumberTreeObjectHelper.hh +++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh @@ -39,7 +39,7 @@ class NNTreeImpl; class NNTreeIterator; class NNTreeDetails; -class QPDFNumberTreeObjectHelper: public QPDFObjectHelper +class QPDF_DLL_CLASS QPDFNumberTreeObjectHelper: public QPDFObjectHelper { public: // The qpdf object is required so that this class can issue @@ -49,7 +49,7 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper QPDFObjectHandle, QPDF&, bool auto_repair = true); QPDF_DLL - virtual ~QPDFNumberTreeObjectHelper() = default; + virtual ~QPDFNumberTreeObjectHelper(); // Create an empty number tree QPDF_DLL @@ -85,7 +85,7 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper bool findObjectAtOrBelow( numtree_number idx, QPDFObjectHandle& oh, numtree_number& offset); - class iterator + class QPDF_DLL_PRIVATE iterator { friend class QPDFNumberTreeObjectHelper; @@ -200,7 +200,7 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper void setSplitThreshold(int); private: - class Members + class QPDF_DLL_PRIVATE Members { friend class QPDFNumberTreeObjectHelper; typedef QPDFNumberTreeObjectHelper::numtree_number numtree_number; diff --git a/include/qpdf/QPDFObject.hh b/include/qpdf/QPDFObject.hh index eb7c4b90..e6d1d18b 100644 --- a/include/qpdf/QPDFObject.hh +++ b/include/qpdf/QPDFObject.hh @@ -25,6 +25,7 @@ #include <qpdf/Constants.h> #include <qpdf/DLL.h> #include <qpdf/JSON.hh> +#include <qpdf/QPDFValue.hh> #include <qpdf/Types.h> #include <string> @@ -34,9 +35,9 @@ class QPDFObjectHandle; class QPDFObject { - public: - QPDFObject(); + friend class QPDFValue; + public: // Objects derived from QPDFObject are accessible through // QPDFObjectHandle. Each object returns a unique type code that // has one of the valid qpdf_object_type_e values. As new object @@ -61,18 +62,128 @@ class QPDFObject static constexpr object_type_e ot_stream = ::ot_stream; static constexpr object_type_e ot_operator = ::ot_operator; static constexpr object_type_e ot_inlineimage = ::ot_inlineimage; + static constexpr object_type_e ot_unresolved = ::ot_unresolved; + QPDFObject() = default; virtual ~QPDFObject() = default; - virtual std::shared_ptr<QPDFObject> shallowCopy() = 0; - virtual std::string unparse() = 0; - virtual JSON getJSON(int json_version) = 0; + + std::shared_ptr<QPDFObject> + shallowCopy() + { + return value->shallowCopy(); + } + std::string + unparse() + { + return value->unparse(); + } + JSON + getJSON(int json_version) + { + return value->getJSON(json_version); + } // Return a unique type code for the object - virtual object_type_e getTypeCode() const = 0; + object_type_e + getTypeCode() const + { + return value->type_code; + } // Return a string literal that describes the type, useful for // debugging and testing - virtual char const* getTypeName() const = 0; + char const* + getTypeName() const + { + return value->type_name; + } + // Returns nullptr for direct objects + QPDF* + getQPDF() const + { + return value->qpdf; + } + QPDFObjGen + getObjGen() const + { + return value->og; + } + + void + setDescription(QPDF* qpdf, std::string const& description) + { + return value->setDescription(qpdf, description); + } + bool + getDescription(QPDF*& qpdf, std::string& description) + { + return value->getDescription(qpdf, description); + } + bool + hasDescription() + { + return value->hasDescription(); + } + void + setParsedOffset(qpdf_offset_t offset) + { + value->setParsedOffset(offset); + } + qpdf_offset_t + getParsedOffset() + { + return value->getParsedOffset(); + } + void + assign(std::shared_ptr<QPDFObject> o) + { + value = o->value; + } + void + swapWith(std::shared_ptr<QPDFObject> o) + { + auto v = value; + value = o->value; + o->value = v; + auto og = value->og; + value->og = o->value->og; + o->value->og = og; + } + + // The following two methods are for use by class QPDF only + void + setObjGen(QPDF* qpdf, QPDFObjGen const& og) + { + value->qpdf = qpdf; + value->og = og; + } + void + resetObjGen() + { + value->qpdf = nullptr; + value->og = QPDFObjGen(); + } + + bool + isUnresolved() const + { + return value->type_code == ::ot_unresolved; + } + void + resolve() + { + if (isUnresolved()) { + doResolve(); + } + } + void doResolve(); + + template <typename T> + T* + as() + { + return dynamic_cast<T*>(value.get()); + } // Accessor to give specific access to non-public methods class ObjAccessor @@ -89,29 +200,20 @@ class QPDFObject } } }; - friend class ObjAccessor; - virtual void setDescription(QPDF*, std::string const&); - bool getDescription(QPDF*&, std::string&); - bool hasDescription(); - - void setParsedOffset(qpdf_offset_t offset); - qpdf_offset_t getParsedOffset(); + friend class ObjAccessor; protected: virtual void releaseResolved() { + value->releaseResolved(); } - static std::shared_ptr<QPDFObject> do_create(QPDFObject*); private: QPDFObject(QPDFObject const&) = delete; QPDFObject& operator=(QPDFObject const&) = delete; - - QPDF* owning_qpdf; - std::string object_description; - qpdf_offset_t parsed_offset; + std::shared_ptr<QPDFValue> value; }; #endif // QPDFOBJECT_HH diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 7ea6b062..16e8dc8b 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -43,15 +43,28 @@ class Pipeline; class QPDF; -class QPDF_Dictionary; class QPDF_Array; +class QPDF_Bool; +class QPDF_Dictionary; +class QPDF_InlineImage; +class QPDF_Integer; +class QPDF_Name; +class QPDF_Null; +class QPDF_Operator; +class QPDF_Real; +class QPDF_Reserved; +class QPDF_Stream; +class QPDF_String; class QPDFTokenizer; class QPDFExc; class Pl_QPDFTokenizer; class QPDFMatrix; +class QPDFParser; class QPDFObjectHandle { + friend class QPDFParser; + public: // This class is used by replaceStreamData. It provides an // alternative way of associating stream data with a stream. See @@ -313,13 +326,13 @@ class QPDFObjectHandle }; QPDF_DLL - QPDFObjectHandle(); + QPDFObjectHandle() = default; QPDF_DLL QPDFObjectHandle(QPDFObjectHandle const&) = default; QPDF_DLL QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default; QPDF_DLL - bool isInitialized() const; + inline bool isInitialized() const; // Return type code and type name of underlying object. These are // useful for doing rapid type tests (like switch statements) or @@ -367,7 +380,7 @@ class QPDFObjectHandle // This returns true in addition to the query for the specific // type for indirect objects. QPDF_DLL - bool isIndirect(); + inline bool isIndirect() const; // True for everything except array, dictionary, stream, word, and // inline image. @@ -957,9 +970,11 @@ class QPDFObjectHandle std::set<std::string>* resource_names = nullptr); // Return the QPDF object that owns an indirect object. Returns - // null for a direct object. + // null for a direct object if allow_nullptr is set to true or + // throws a runtime error otherwise. QPDF_DLL - QPDF* getOwningQPDF(); + inline QPDF* getOwningQPDF( + bool allow_nullptr = true, std::string const& error_msg = "") const; // Create a shallow copy of an object as a direct object, but do not // traverse across indirect object boundaries. That means that, @@ -1300,11 +1315,11 @@ class QPDFObjectHandle // QPDFObjGen instead. QPDF_DLL - QPDFObjGen getObjGen() const; + inline QPDFObjGen getObjGen() const; QPDF_DLL - int getObjectID() const; + inline int getObjectID() const; QPDF_DLL - int getGeneration() const; + inline int getGeneration() const; QPDF_DLL std::string unparse(); @@ -1438,9 +1453,9 @@ class QPDFObjectHandle private: static QPDFObjectHandle - newIndirect(QPDF* qpdf, QPDFObjGen const& og) + newIndirect(std::shared_ptr<QPDFObject> const& obj) { - return QPDFObjectHandle::newIndirect(qpdf, og); + return QPDFObjectHandle(obj); } static QPDFObjectHandle newStream( @@ -1453,12 +1468,6 @@ class QPDFObjectHandle return QPDFObjectHandle::newStream( qpdf, og, stream_dict, offset, length); } - // Reserve an object with a specific ID - static QPDFObjectHandle - makeReserved() - { - return QPDFObjectHandle::makeReserved(); - } }; friend class Factory; @@ -1478,6 +1487,16 @@ class QPDFObjectHandle }; return o.obj; } + static QPDF_Array* + asArray(QPDFObjectHandle& oh) + { + return oh.asArray(); + } + static QPDF_Stream* + asStream(QPDFObjectHandle& oh) + { + return oh.asStream(); + } }; friend class ObjAccessor; @@ -1558,27 +1577,32 @@ class QPDFObjectHandle bool isImage(bool exclude_imagemask = true); private: - QPDFObjectHandle(QPDF*, QPDFObjGen const& og); - QPDFObjectHandle(std::shared_ptr<QPDFObject> const&); - - enum parser_state_e { - st_top, - st_start, - st_stop, - st_eof, - st_dictionary, - st_array - }; + QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) : + obj(obj) + { + } // Private object factory methods - static QPDFObjectHandle newIndirect(QPDF*, QPDFObjGen const& og); static QPDFObjectHandle newStream( QPDF* qpdf, QPDFObjGen const& og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length); - static QPDFObjectHandle makeReserved(); + + QPDF_Array* asArray(); + QPDF_Bool* asBool(); + QPDF_Dictionary* asDictionary(); + QPDF_InlineImage* asInlineImage(); + QPDF_Integer* asInteger(); + QPDF_Name* asName(); + QPDF_Null* asNull(); + QPDF_Operator* asOperator(); + QPDF_Real* asReal(); + QPDF_Reserved* asReserved(); + QPDF_Stream* asStream(); + QPDF_Stream* asStreamWithAssert(); + QPDF_String* asString(); void typeWarning(char const* expected_type, std::string const& warning); void objectWarning(std::string const& warning); @@ -1591,21 +1615,8 @@ class QPDFObjectHandle bool stop_at_streams); void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only); void releaseResolved(); - static void setObjectDescriptionFromInput( - QPDFObjectHandle, - QPDF*, - std::string const&, - std::shared_ptr<InputSource>, - qpdf_offset_t); - static QPDFObjectHandle parseInternal( - std::shared_ptr<InputSource> input, - std::string const& object_description, - QPDFTokenizer& tokenizer, - bool& empty, - StringDecrypter* decrypter, - QPDF* context, - bool content_stream); - void setParsedOffset(qpdf_offset_t offset); + + inline void setParsedOffset(qpdf_offset_t offset); void parseContentStream_internal( std::string const& description, ParserCallbacks* callbacks); static void parseContentStream_data( @@ -1618,15 +1629,10 @@ class QPDFObjectHandle static void warn(QPDF*, QPDFExc const&); void checkOwnership(QPDFObjectHandle const&) const; - bool initialized; - // Moving members of QPDFObjectHandle into a smart pointer incurs // a substantial performance penalty since QPDFObjectHandle // objects are copied around so frequently. - QPDF* qpdf; - QPDFObjGen og; std::shared_ptr<QPDFObject> obj; - bool reserved; }; #ifndef QPDF_NO_QPDF_STRING @@ -1846,4 +1852,58 @@ class QPDFObjectHandle::QPDFArrayItems QPDFObjectHandle oh; }; +inline QPDFObjGen +QPDFObjectHandle::getObjGen() const +{ + return isInitialized() ? obj->getObjGen() : QPDFObjGen(); +} + +inline int +QPDFObjectHandle::getObjectID() const +{ + return getObjGen().getObj(); +} + +inline int +QPDFObjectHandle::getGeneration() const +{ + return getObjGen().getGen(); +} + +inline bool +QPDFObjectHandle::isIndirect() const +{ + return (obj != nullptr) && (getObjectID() != 0); +} + +inline bool +QPDFObjectHandle::isInitialized() const +{ + return obj != nullptr; +} + +// Indirect object accessors +inline QPDF* +QPDFObjectHandle::getOwningQPDF( + bool allow_nullptr, std::string const& error_msg) const +{ + // Will be null for direct objects + auto result = isInitialized() ? this->obj->getQPDF() : nullptr; + if (!allow_nullptr && (result == nullptr)) { + throw std::runtime_error( + error_msg == "" ? "attempt to use a null qpdf object" : error_msg); + } + return result; +} + +inline void +QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset) +{ + // This is called during parsing on newly created direct objects, + // so we can't call dereference() here. + if (isInitialized()) { + this->obj->setParsedOffset(offset); + } +} + #endif // QPDFOBJECTHANDLE_HH diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 2187f21e..33b2e710 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -193,60 +193,82 @@ class QPDFTokenizer QPDFTokenizer(QPDFTokenizer const&) = delete; QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; - void resolveLiteral(); bool isSpace(char); bool isDelimiter(char); void findEI(std::shared_ptr<InputSource> input); enum state_e { st_top, + st_in_hexstring, + st_in_string, + st_in_hexstring_2nd, + st_name, + st_literal, st_in_space, st_in_comment, - st_in_string, + st_string_escape, + st_char_code, + st_string_after_cr, st_lt, st_gt, - st_literal, - st_in_hexstring, st_inline_image, + st_sign, + st_number, + st_real, + st_decimal, + st_name_hex1, + st_name_hex2, + st_before_token, st_token_ready }; - class Members - { - friend class QPDFTokenizer; - - public: - QPDF_DLL - ~Members() = default; + void handleCharacter(char); + void inBeforeToken(char); + void inTop(char); + void inSpace(char); + void inComment(char); + void inString(char); + void inName(char); + void inLt(char); + void inGt(char); + void inStringAfterCR(char); + void inStringEscape(char); + void inLiteral(char); + void inCharCode(char); + void inHexstring(char); + void inHexstring2nd(char); + void inInlineImage(char); + void inTokenReady(char); + void inNameHex1(char); + void inNameHex2(char); + void inSign(char); + void inDecimal(char); + void inNumber(char); + void inReal(char); + void reset(); - private: - Members(); - Members(Members const&) = delete; - void reset(); + // Lexer state + state_e state; - // Lexer state - state_e state; + bool allow_eof; + bool include_ignorable; - bool allow_eof; - bool include_ignorable; + // Current token accumulation + token_type_e type; + std::string val; + std::string raw_val; + std::string error_message; + bool before_token; + bool in_token; + char char_to_unread; + size_t inline_image_bytes; + bool bad; - // Current token accumulation - token_type_e type; - std::string val; - std::string raw_val; - std::string error_message; - bool unread_char; - char char_to_unread; - size_t inline_image_bytes; - - // State for strings - int string_depth; - bool string_ignoring_newline; - char bs_num_register[4]; - bool last_char_was_bs; - bool last_char_was_cr; - }; - std::shared_ptr<Members> m; + // State for strings + int string_depth; + int char_code; + char hex_char; + int digit_count; }; #endif // QPDFTOKENIZER_HH diff --git a/include/qpdf/QPDFValue.hh b/include/qpdf/QPDFValue.hh new file mode 100644 index 00000000..8b4f53b5 --- /dev/null +++ b/include/qpdf/QPDFValue.hh @@ -0,0 +1,130 @@ +// Copyright (c) 2005-2022 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFVALUE_HH +#define QPDFVALUE_HH + +#include <qpdf/Constants.h> +#include <qpdf/DLL.h> +#include <qpdf/JSON.hh> +#include <qpdf/QPDFObjGen.hh> +#include <qpdf/Types.h> + +#include <string> + +class QPDF; +class QPDFObjectHandle; +class QPDFObject; + +class QPDFValue +{ + friend class QPDFObject; + + public: + virtual ~QPDFValue() = default; + + virtual std::shared_ptr<QPDFObject> shallowCopy() = 0; + virtual std::string unparse() = 0; + virtual JSON getJSON(int json_version) = 0; + virtual void + setDescription(QPDF* qpdf, std::string const& description) + { + owning_qpdf = qpdf; + object_description = description; + } + bool + getDescription(QPDF*& qpdf, std::string& description) + { + qpdf = owning_qpdf; + description = object_description; + return owning_qpdf != nullptr; + } + bool + hasDescription() + { + return owning_qpdf != nullptr; + } + void + setParsedOffset(qpdf_offset_t offset) + { + if (parsed_offset < 0) { + parsed_offset = offset; + } + } + qpdf_offset_t + getParsedOffset() + { + return parsed_offset; + } + QPDF* + getQPDF() + { + return qpdf; + } + QPDFObjGen + getObjGen() + { + return og; + } + + protected: + QPDFValue() : + type_code(::ot_uninitialized), + type_name("uninitialized") + { + } + QPDFValue(qpdf_object_type_e type_code, char const* type_name) : + type_code(type_code), + type_name(type_name) + { + } + QPDFValue( + qpdf_object_type_e type_code, + char const* type_name, + QPDF* qpdf, + QPDFObjGen const& og) : + type_code(type_code), + type_name(type_name), + qpdf(qpdf), + og(og) + { + } + virtual void + releaseResolved() + { + } + static std::shared_ptr<QPDFObject> do_create(QPDFValue*); + + private: + QPDFValue(QPDFValue const&) = delete; + QPDFValue& operator=(QPDFValue const&) = delete; + QPDF* owning_qpdf{nullptr}; + std::string object_description; + qpdf_offset_t parsed_offset{-1}; + const qpdf_object_type_e type_code; + char const* type_name; + + protected: + QPDF* qpdf{nullptr}; + QPDFObjGen og; +}; + +#endif // QPDFVALUE_HH diff --git a/include/qpdf/QTC.hh b/include/qpdf/QTC.hh index 1fa55901..70115981 100644 --- a/include/qpdf/QTC.hh +++ b/include/qpdf/QTC.hh @@ -24,10 +24,24 @@ #include <qpdf/DLL.h> +// Defining QPDF_DISABLE_QTC will effectively compile out any QTC::TC +// calls in any code that includes this file, but QTC will still be +// built into the library. That way, it is possible to build and +// package qpdf with QPDF_DISABLE_QTC while still making QTC::TC +// available to end users. + namespace QTC { QPDF_DLL - void TC(char const* const scope, char const* const ccase, int n = 0); + void TC_real(char const* const scope, char const* const ccase, int n = 0); + + inline void + TC(char const* const scope, char const* const ccase, int n = 0) + { +#ifndef QPDF_DISABLE_QTC + TC_real(scope, ccase, n); +#endif // QPDF_DISABLE_QTC + } }; // namespace QTC #endif // QTC_HH diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index 32aeae1f..96f4f7ed 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -25,6 +25,7 @@ #include <qpdf/DLL.h> #include <qpdf/PointerHolder.hh> #include <qpdf/Types.h> +#include <cstring> #include <functional> #include <list> #include <memory> @@ -489,16 +490,16 @@ namespace QUtil // classes without using ctype, which we avoid because of locale // considerations. QPDF_DLL - bool is_hex_digit(char); + inline bool is_hex_digit(char); QPDF_DLL - bool is_space(char); + inline bool is_space(char); QPDF_DLL - bool is_digit(char); + inline bool is_digit(char); QPDF_DLL - bool is_number(char const*); + inline bool is_number(char const*); // This method parses the numeric range syntax used by the qpdf // command-line tool. May throw std::runtime_error. @@ -524,6 +525,62 @@ namespace QUtil wchar_t const* const argv[], std::function<int(int, char const* const[])> realmain); #endif // QPDF_NO_WCHAR_T -}; // namespace QUtil + + // Try to return the maximum amount of memory allocated by the + // current process and its threads. Return 0 if unable to + // determine. This is Linux-specific and not implemented to be + // completely reliable. It is used during development for + // performance testing to detect changes that may significantly + // change memory usage. It is not recommended for use for other + // purposes. + QPDF_DLL + size_t get_max_memory_usage(); +}; // namespace QUtil + +inline bool +QUtil::is_hex_digit(char ch) +{ + return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr)); +} + +inline bool +QUtil::is_space(char ch) +{ + return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr)); +} + +inline bool +QUtil::is_digit(char ch) +{ + return ((ch >= '0') && (ch <= '9')); +} + +inline bool +QUtil::is_number(char const* p) +{ + // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ + if (!*p) { + return false; + } + if ((*p == '-') || (*p == '+')) { + ++p; + } + bool found_dot = false; + bool found_digit = false; + for (; *p; ++p) { + if (*p == '.') { + if (found_dot) { + // only one dot + return false; + } + found_dot = true; + } else if (QUtil::is_digit(*p)) { + found_digit = true; + } else { + return false; + } + } + return found_digit; +} #endif // QUTIL_HH diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh index 90927ded..cc655c23 100644 --- a/include/qpdf/auto_job_c_main.hh +++ b/include/qpdf/auto_job_c_main.hh @@ -33,6 +33,7 @@ QPDF_DLL Config* qdf(); QPDF_DLL Config* rawStreamData(); QPDF_DLL Config* recompressFlate(); QPDF_DLL Config* removePageLabels(); +QPDF_DLL Config* reportMemUsage(); QPDF_DLL Config* requiresPassword(); QPDF_DLL Config* showEncryption(); QPDF_DLL Config* showEncryptionKey(); @@ -3,15 +3,15 @@ generate_auto_job 9abe2ec994fb98526f5e3c0c199ce2e61a868463cb522a5bc6e9730b655341 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 -include/qpdf/auto_job_c_main.hh cdba1ae6ea5525a585d10a3dd95b7996d62b17de4211fe658b78d9d463b0f313 +include/qpdf/auto_job_c_main.hh 493b9798f5ff8bbcb07c0238693554d77eefa4ae71ce1a0d466de94e3a7a3966 include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c18911614fe8e568ec include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 -job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412 +job.yml a6f22d425980ed960c77c0a4197f46924c14e7943358cd9f0b75811bb1c480ad libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da -libqpdf/qpdf/auto_job_help.hh 53306e4aef8aaca641c0087bc9e064ada1c44a94b826c0bcac7b4eb0c8c41fd5 -libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2 +libqpdf/qpdf/auto_job_help.hh 3e9385a7e0dae993467647466fa30f30baa5968f9270c73ff4e664f5aa415dbe +libqpdf/qpdf/auto_job_init.hh ccb881733849dff5c05721f1aa5c35447cedd415e881c3fef6573901e45be056 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 -libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268 -libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b +libqpdf/qpdf/auto_job_json_init.hh 7ac8f42fb39eda56144ab62b30152a56e9bb2224d0596eb826b7bc421a78d26b +libqpdf/qpdf/auto_job_schema.hh 17352791b09c3b8a8db766375cce31d70c98b67b44ecc398e2ac78984e34fe90 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 41ee93f23f46160fe9eaf7c99fd2ab3bd2e0f6792a341a35bdac1a41cb853ed5 +manual/cli.rst e3fa48bb30c981df1f74d474887155cd6a46f9010b91cd1b7b57e582bf3bf877 @@ -127,6 +127,7 @@ options: - recompress-flate - remove-page-labels - replace-input + - report-mem-usage - requires-password - show-encryption - show-encryption-key @@ -413,6 +414,7 @@ json: Pages.password: _range: "page range" remove-page-labels: + report-mem-usage: rotate: overlay: _file: "source file for overlay" diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc index 5b59c801..6402f639 100644 --- a/libqpdf/BufferInputSource.cc +++ b/libqpdf/BufferInputSource.cc @@ -7,8 +7,8 @@ #include <stdexcept> #include <string.h> -BufferInputSource::Members::Members( - bool own_memory, std::string const& description, Buffer* buf) : +BufferInputSource::BufferInputSource( + std::string const& description, Buffer* buf, bool own_memory) : own_memory(own_memory), description(description), buf(buf), @@ -18,60 +18,54 @@ BufferInputSource::Members::Members( } BufferInputSource::BufferInputSource( - std::string const& description, Buffer* buf, bool own_memory) : - m(new Members(own_memory, description, buf)) -{ -} - -BufferInputSource::BufferInputSource( std::string const& description, std::string const& contents) : - m(new Members(true, description, nullptr)) + own_memory(true), + description(description), + buf(new Buffer(contents.length())), + cur_offset(0), + max_offset(QIntC::to_offset(buf->getSize())) { - this->m->buf = new Buffer(contents.length()); - this->m->max_offset = QIntC::to_offset(this->m->buf->getSize()); - unsigned char* bp = this->m->buf->getBuffer(); - memcpy(bp, contents.c_str(), contents.length()); + memcpy(buf->getBuffer(), contents.c_str(), contents.length()); } BufferInputSource::~BufferInputSource() { - if (this->m->own_memory) { - delete this->m->buf; + if (this->own_memory) { + delete this->buf; } } qpdf_offset_t BufferInputSource::findAndSkipNextEOL() { - if (this->m->cur_offset < 0) { + if (this->cur_offset < 0) { throw std::logic_error("INTERNAL ERROR: BufferInputSource offset < 0"); } - qpdf_offset_t end_pos = this->m->max_offset; - if (this->m->cur_offset >= end_pos) { + qpdf_offset_t end_pos = this->max_offset; + if (this->cur_offset >= end_pos) { this->last_offset = end_pos; - this->m->cur_offset = end_pos; + this->cur_offset = end_pos; return end_pos; } qpdf_offset_t result = 0; - unsigned char const* buffer = this->m->buf->getBuffer(); + unsigned char const* buffer = this->buf->getBuffer(); unsigned char const* end = buffer + end_pos; - unsigned char const* p = buffer + this->m->cur_offset; + unsigned char const* p = buffer + this->cur_offset; while ((p < end) && !((*p == '\r') || (*p == '\n'))) { ++p; } if (p < end) { result = p - buffer; - this->m->cur_offset = result + 1; + this->cur_offset = result + 1; ++p; - while ((this->m->cur_offset < end_pos) && - ((*p == '\r') || (*p == '\n'))) { + while ((this->cur_offset < end_pos) && ((*p == '\r') || (*p == '\n'))) { ++p; - ++this->m->cur_offset; + ++this->cur_offset; } } else { - this->m->cur_offset = end_pos; + this->cur_offset = end_pos; result = end_pos; } return result; @@ -80,13 +74,13 @@ BufferInputSource::findAndSkipNextEOL() std::string const& BufferInputSource::getName() const { - return this->m->description; + return this->description; } qpdf_offset_t BufferInputSource::tell() { - return this->m->cur_offset; + return this->cur_offset; } void @@ -94,17 +88,17 @@ BufferInputSource::seek(qpdf_offset_t offset, int whence) { switch (whence) { case SEEK_SET: - this->m->cur_offset = offset; + this->cur_offset = offset; break; case SEEK_END: - QIntC::range_check(this->m->max_offset, offset); - this->m->cur_offset = this->m->max_offset + offset; + QIntC::range_check(this->max_offset, offset); + this->cur_offset = this->max_offset + offset; break; case SEEK_CUR: - QIntC::range_check(this->m->cur_offset, offset); - this->m->cur_offset += offset; + QIntC::range_check(this->cur_offset, offset); + this->cur_offset += offset; break; default: @@ -113,42 +107,41 @@ BufferInputSource::seek(qpdf_offset_t offset, int whence) break; } - if (this->m->cur_offset < 0) { + if (this->cur_offset < 0) { throw std::runtime_error( - this->m->description + ": seek before beginning of buffer"); + this->description + ": seek before beginning of buffer"); } } void BufferInputSource::rewind() { - this->m->cur_offset = 0; + this->cur_offset = 0; } size_t BufferInputSource::read(char* buffer, size_t length) { - if (this->m->cur_offset < 0) { + if (this->cur_offset < 0) { throw std::logic_error("INTERNAL ERROR: BufferInputSource offset < 0"); } - qpdf_offset_t end_pos = this->m->max_offset; - if (this->m->cur_offset >= end_pos) { + qpdf_offset_t end_pos = this->max_offset; + if (this->cur_offset >= end_pos) { this->last_offset = end_pos; return 0; } - this->last_offset = this->m->cur_offset; - size_t len = - std::min(QIntC::to_size(end_pos - this->m->cur_offset), length); - memcpy(buffer, this->m->buf->getBuffer() + this->m->cur_offset, len); - this->m->cur_offset += QIntC::to_offset(len); + this->last_offset = this->cur_offset; + size_t len = std::min(QIntC::to_size(end_pos - this->cur_offset), length); + memcpy(buffer, this->buf->getBuffer() + this->cur_offset, len); + this->cur_offset += QIntC::to_offset(len); return len; } void BufferInputSource::unreadCh(char ch) { - if (this->m->cur_offset > 0) { - --this->m->cur_offset; + if (this->cur_offset > 0) { + --this->cur_offset; } } diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt index cf807f6d..686ea04a 100644 --- a/libqpdf/CMakeLists.txt +++ b/libqpdf/CMakeLists.txt @@ -80,10 +80,12 @@ set(libqpdf_SOURCES QPDFPageDocumentHelper.cc QPDFPageLabelDocumentHelper.cc QPDFPageObjectHelper.cc + QPDFParser.cc QPDFStreamFilter.cc QPDFSystemError.cc QPDFTokenizer.cc QPDFUsage.cc + QPDFValue.cc QPDFWriter.cc QPDFXRefEntry.cc QPDF_Array.cc @@ -98,6 +100,7 @@ set(libqpdf_SOURCES QPDF_Reserved.cc QPDF_Stream.cc QPDF_String.cc + QPDF_Unresolved.cc QPDF_encryption.cc QPDF_json.cc QPDF_linearization.cc @@ -375,6 +378,29 @@ int main(int argc, char* argv[]) { endif() endfunction() +check_c_source_compiles( +"#include <malloc.h> +#include <stdio.h> +int main(int argc, char* argv[]) { + malloc_info(0, stdout); + return 0; +}" + HAVE_MALLOC_INFO) + +check_c_source_compiles( +"#include <stdio.h> +#include <stdlib.h> +int main(int argc, char* argv[]) { + char* buf; + size_t size; + FILE* f; + f = open_memstream(&buf, &size); + fclose(f); + free(buf); + return 0; +}" + HAVE_OPEN_MEMSTREAM) + qpdf_check_ll_fmt("%lld" fmt_lld) qpdf_check_ll_fmt("%I64d" fmt_i64d) qpdf_check_ll_fmt("%I64lld" fmt_i64lld) diff --git a/libqpdf/ClosedFileInputSource.cc b/libqpdf/ClosedFileInputSource.cc index dfb98c6a..06ebb156 100644 --- a/libqpdf/ClosedFileInputSource.cc +++ b/libqpdf/ClosedFileInputSource.cc @@ -2,18 +2,13 @@ #include <qpdf/FileInputSource.hh> -ClosedFileInputSource::Members::Members(char const* filename) : +ClosedFileInputSource::ClosedFileInputSource(char const* filename) : filename(filename), offset(0), stay_open(false) { } -ClosedFileInputSource::ClosedFileInputSource(char const* filename) : - m(new Members(filename)) -{ -} - ClosedFileInputSource::~ClosedFileInputSource() { // Must be explicit and not inline -- see QPDF_DLL_CLASS in @@ -23,30 +18,29 @@ ClosedFileInputSource::~ClosedFileInputSource() void ClosedFileInputSource::before() { - if (nullptr == this->m->fis.get()) { - this->m->fis = - std::make_shared<FileInputSource>(this->m->filename.c_str()); - this->m->fis->seek(this->m->offset, SEEK_SET); - this->m->fis->setLastOffset(this->last_offset); + if (nullptr == this->fis) { + this->fis = std::make_shared<FileInputSource>(this->filename.c_str()); + this->fis->seek(this->offset, SEEK_SET); + this->fis->setLastOffset(this->last_offset); } } void ClosedFileInputSource::after() { - this->last_offset = this->m->fis->getLastOffset(); - this->m->offset = this->m->fis->tell(); - if (this->m->stay_open) { + this->last_offset = this->fis->getLastOffset(); + this->offset = this->fis->tell(); + if (this->stay_open) { return; } - this->m->fis = nullptr; + this->fis = nullptr; } qpdf_offset_t ClosedFileInputSource::findAndSkipNextEOL() { before(); - qpdf_offset_t r = this->m->fis->findAndSkipNextEOL(); + qpdf_offset_t r = this->fis->findAndSkipNextEOL(); after(); return r; } @@ -54,14 +48,14 @@ ClosedFileInputSource::findAndSkipNextEOL() std::string const& ClosedFileInputSource::getName() const { - return this->m->filename; + return this->filename; } qpdf_offset_t ClosedFileInputSource::tell() { before(); - qpdf_offset_t r = this->m->fis->tell(); + qpdf_offset_t r = this->fis->tell(); after(); return r; } @@ -70,16 +64,16 @@ void ClosedFileInputSource::seek(qpdf_offset_t offset, int whence) { before(); - this->m->fis->seek(offset, whence); + this->fis->seek(offset, whence); after(); } void ClosedFileInputSource::rewind() { - this->m->offset = 0; - if (this->m->fis.get()) { - this->m->fis->rewind(); + this->offset = 0; + if (this->fis.get()) { + this->fis->rewind(); } } @@ -87,7 +81,7 @@ size_t ClosedFileInputSource::read(char* buffer, size_t length) { before(); - size_t r = this->m->fis->read(buffer, length); + size_t r = this->fis->read(buffer, length); after(); return r; } @@ -96,7 +90,7 @@ void ClosedFileInputSource::unreadCh(char ch) { before(); - this->m->fis->unreadCh(ch); + this->fis->unreadCh(ch); // Don't call after -- the file has to stay open after this // operation. } @@ -104,8 +98,8 @@ ClosedFileInputSource::unreadCh(char ch) void ClosedFileInputSource::stayOpen(bool val) { - this->m->stay_open = val; - if ((!val) && this->m->fis.get()) { + this->stay_open = val; + if ((!val) && this->fis.get()) { after(); } } diff --git a/libqpdf/FileInputSource.cc b/libqpdf/FileInputSource.cc index ab88d302..2b1ee1ab 100644 --- a/libqpdf/FileInputSource.cc +++ b/libqpdf/FileInputSource.cc @@ -5,60 +5,52 @@ #include <algorithm> #include <string.h> -FileInputSource::Members::Members(bool close_file) : - close_file(close_file), - file(nullptr) -{ -} - -FileInputSource::Members::~Members() -{ - if (this->file && this->close_file) { - fclose(this->file); - } -} - FileInputSource::FileInputSource() : - m(new Members(false)) + close_file(false), + file(nullptr) { } FileInputSource::FileInputSource(char const* filename) : - m(new Members(false)) + close_file(true), + filename(filename), + file(QUtil::safe_fopen(filename, "rb")) { - setFilename(filename); } FileInputSource::FileInputSource( char const* description, FILE* filep, bool close_file) : - m(new Members(false)) + close_file(close_file), + filename(description), + file(filep) +{ +} + +FileInputSource::~FileInputSource() { - setFile(description, filep, close_file); + // Must be explicit and not inline -- see QPDF_DLL_CLASS in + // README-maintainer + if (this->file && this->close_file) { + fclose(this->file); + } } void FileInputSource::setFilename(char const* filename) { - this->m = std::shared_ptr<Members>(new Members(true)); - this->m->filename = filename; - this->m->file = QUtil::safe_fopen(filename, "rb"); + this->close_file = true; + this->filename = filename; + this->file = QUtil::safe_fopen(filename, "rb"); } void FileInputSource::setFile(char const* description, FILE* filep, bool close_file) { - this->m = std::shared_ptr<Members>(new Members(close_file)); - this->m->filename = description; - this->m->file = filep; + this->filename = description; + this->file = filep; this->seek(0, SEEK_SET); } -FileInputSource::~FileInputSource() -{ - // Must be explicit and not inline -- see QPDF_DLL_CLASS in - // README-maintainer -} - qpdf_offset_t FileInputSource::findAndSkipNextEOL() { @@ -66,7 +58,7 @@ FileInputSource::findAndSkipNextEOL() bool done = false; char buf[10240]; while (!done) { - qpdf_offset_t cur_offset = QUtil::tell(this->m->file); + qpdf_offset_t cur_offset = QUtil::tell(this->file); size_t len = this->read(buf, sizeof(buf)); if (len == 0) { done = true; @@ -98,41 +90,42 @@ FileInputSource::findAndSkipNextEOL() std::string const& FileInputSource::getName() const { - return this->m->filename; + return this->filename; } qpdf_offset_t FileInputSource::tell() { - return QUtil::tell(this->m->file); + return QUtil::tell(this->file); } void FileInputSource::seek(qpdf_offset_t offset, int whence) { - QUtil::os_wrapper( - (std::string("seek to ") + this->m->filename + ", offset " + - QUtil::int_to_string(offset) + " (" + QUtil::int_to_string(whence) + - ")"), - QUtil::seek(this->m->file, offset, whence)); + if (QUtil::seek(this->file, offset, whence) == -1) { + QUtil::throw_system_error( + std::string("seek to ") + this->filename + ", offset " + + QUtil::int_to_string(offset) + " (" + QUtil::int_to_string(whence) + + ")"); + } } void FileInputSource::rewind() { - ::rewind(this->m->file); + ::rewind(this->file); } size_t FileInputSource::read(char* buffer, size_t length) { - this->last_offset = this->tell(); - size_t len = fread(buffer, 1, length, this->m->file); + this->last_offset = QUtil::tell(this->file); + size_t len = fread(buffer, 1, length, this->file); if (len == 0) { - if (ferror(this->m->file)) { + if (ferror(this->file)) { throw QPDFExc( qpdf_e_system, - this->m->filename, + this->filename, "", this->last_offset, (std::string("read ") + QUtil::uint_to_string(length) + @@ -148,7 +141,7 @@ FileInputSource::read(char* buffer, size_t length) void FileInputSource::unreadCh(char ch) { - QUtil::os_wrapper( - this->m->filename + ": unread character", - ungetc(static_cast<unsigned char>(ch), this->m->file)); + if (ungetc(static_cast<unsigned char>(ch), this->file) == -1) { + QUtil::throw_system_error(this->filename + ": unread character"); + } } diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c825413e..07180cf5 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -204,7 +204,7 @@ JSON::JSON_blob::write(Pipeline* p, size_t) const void JSON::write(Pipeline* p, size_t depth) const { - if (nullptr == this->m->value.get()) { + if (nullptr == this->m->value) { *p << "null"; } else { this->m->value->write(p, depth); @@ -1122,7 +1122,7 @@ JSONParser::handleToken() break; } - if ((item.get() == nullptr) == (delimiter == '\0')) { + if ((item == nullptr) == (delimiter == '\0')) { throw std::logic_error( "JSONParser::handleToken: logic error: exactly one of item" " or delimiter must be set"); diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc index c7e3f923..791656d8 100644 --- a/libqpdf/Pl_Buffer.cc +++ b/libqpdf/Pl_Buffer.cc @@ -26,7 +26,7 @@ Pl_Buffer::~Pl_Buffer() void Pl_Buffer::write(unsigned char const* buf, size_t len) { - if (this->m->data.get() == nullptr) { + if (this->m->data == nullptr) { this->m->data = std::make_shared<Buffer>(len); } size_t cur_size = this->m->data->getSize(); diff --git a/libqpdf/Pl_Flate.cc b/libqpdf/Pl_Flate.cc index 4183b7e4..7f2fa4a6 100644 --- a/libqpdf/Pl_Flate.cc +++ b/libqpdf/Pl_Flate.cc @@ -86,7 +86,7 @@ Pl_Flate::warn(char const* msg, int code) void Pl_Flate::write(unsigned char const* data, size_t len) { - if (this->m->outbuf.get() == nullptr) { + if (this->m->outbuf == nullptr) { throw std::logic_error( this->identifier + ": Pl_Flate: write() called after finish() called"); diff --git a/libqpdf/Pl_RC4.cc b/libqpdf/Pl_RC4.cc index 823b22ea..ba0cb201 100644 --- a/libqpdf/Pl_RC4.cc +++ b/libqpdf/Pl_RC4.cc @@ -18,7 +18,7 @@ Pl_RC4::Pl_RC4( void Pl_RC4::write(unsigned char const* data, size_t len) { - if (this->outbuf.get() == nullptr) { + if (this->outbuf == nullptr) { throw std::logic_error( this->identifier + ": Pl_RC4: write() called after finish() called"); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index c2c764db..9593c44f 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -24,7 +24,9 @@ #include <qpdf/QPDF_Array.hh> #include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Null.hh> +#include <qpdf/QPDF_Reserved.hh> #include <qpdf/QPDF_Stream.hh> +#include <qpdf/QPDF_Unresolved.hh> #include <qpdf/QTC.hh> #include <qpdf/QUtil.hh> @@ -222,7 +224,6 @@ QPDF::Members::Members() : immediate_copy_from(false), in_parse(false), parsed(false), - ever_replaced_objects(false), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -258,6 +259,7 @@ QPDF::~QPDF() this->m->xref_table.clear(); for (auto const& iter: this->m->obj_cache) { QPDFObject::ObjAccessor::releaseResolved(iter.second.object.get()); + iter.second.object->resetObjGen(); } } @@ -1397,7 +1399,7 @@ QPDF::fixDanglingReferences(bool force) std::list<QPDFObjectHandle> queue; queue.push_back(this->m->trailer); for (auto const& og: to_process) { - QPDFObjectHandle obj = QPDFObjectHandle::Factory::newIndirect(this, og); + auto obj = getObject(og); if (obj.isDictionary() || obj.isArray()) { queue.push_back(obj); } else if (obj.isStream()) { @@ -1419,18 +1421,15 @@ QPDF::fixDanglingReferences(bool force) to_check.push_back(iter.second); } } else if (obj.isArray()) { - QPDF_Array* arr = dynamic_cast<QPDF_Array*>( - QPDFObjectHandle::ObjAccessor::getObject(obj).get()); + auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj); arr->addExplicitElementsToList(to_check); } for (auto sub: to_check) { if (sub.isIndirect()) { - if (sub.getOwningQPDF() == this) { - QPDFObjGen og(sub.getObjGen()); - if (this->m->obj_cache.count(og) == 0) { - QTC::TC("qpdf", "QPDF detected dangling ref"); - queue.push_back(sub); - } + if ((sub.getOwningQPDF() == this) && + isUnresolved(sub.getObjGen())) { + QTC::TC("qpdf", "QPDF detected dangling ref"); + queue.push_back(sub); } } else { queue.push_back(sub); @@ -1462,8 +1461,7 @@ QPDF::getAllObjects() fixDanglingReferences(true); std::vector<QPDFObjectHandle> result; for (auto const& iter: this->m->obj_cache) { - QPDFObjGen const& og = iter.first; - result.push_back(QPDFObjectHandle::Factory::newIndirect(this, og)); + result.push_back(newIndirect(iter.first, iter.second.object)); } return result; } @@ -1888,7 +1886,7 @@ QPDF::readObjectAtOffset( "expected endobj"); } - if (!this->m->obj_cache.count(og)) { + if (isUnresolved(og)) { // Store the object in the cache here so it gets cached // whether we first know the offset or whether we first know // the object ID and generation (in which we case we would get @@ -1919,8 +1917,8 @@ QPDF::readObjectAtOffset( } } qpdf_offset_t end_after_space = this->m->file->tell(); - - this->m->obj_cache[og] = ObjCache( + updateCache( + og, QPDFObjectHandle::ObjAccessor::getObject(oh), end_before_space, end_after_space); @@ -1929,31 +1927,14 @@ QPDF::readObjectAtOffset( return oh; } -bool -QPDF::objectChanged(QPDFObjGen const& og, std::shared_ptr<QPDFObject>& oph) -{ - // See if the object cached at og, if any, is the one passed in. - // QPDFObjectHandle uses this to detect outdated handles to - // replaced or swapped objects. This is a somewhat expensive check - // because it happens with every dereference of a - // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the - // check if we never called a function that replaces an object - // already in cache. It is important for functions that do this to - // set ever_replaced_objects = true. - - if (!this->m->ever_replaced_objects) { - return false; - } - auto c = this->m->obj_cache.find(og); - if (c == this->m->obj_cache.end()) { - return true; - } - return (c->second.object.get() != oph.get()); -} - -std::shared_ptr<QPDFObject> +void QPDF::resolve(QPDFObjGen const& og) { + if (isCached(og) && !isUnresolved(og)) { + // We only need to resolve unresolved objects + return; + } + // Check object cache before checking xref table. This allows us // to insert things into the object cache that don't actually // exist in the file. @@ -1967,11 +1948,12 @@ QPDF::resolve(QPDFObjGen const& og) "", this->m->file->getLastOffset(), ("loop detected resolving object " + og.unparse(' '))); - return QPDF_Null::create(); + updateCache(og, QPDF_Null::create(), -1, -1); + return; } ResolveRecorder rr(this, og); - if ((!this->m->obj_cache.count(og)) && this->m->xref_table.count(og)) { + if (m->xref_table.count(og) != 0) { QPDFXRefEntry const& entry = this->m->xref_table[og]; try { switch (entry.getType()) { @@ -2009,19 +1991,17 @@ QPDF::resolve(QPDFObjGen const& og) ": error reading object: " + e.what())); } } - if (this->m->obj_cache.count(og) == 0) { + + if (isUnresolved(og)) { // PDF spec says unknown objects resolve to the null object. QTC::TC("qpdf", "QPDF resolve failure to null"); - QPDFObjectHandle oh = QPDFObjectHandle::newNull(); - this->m->obj_cache[og] = - ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); + updateCache(og, QPDF_Null::create(), -1, -1); } - std::shared_ptr<QPDFObject> result(this->m->obj_cache[og].object); + auto result(this->m->obj_cache[og].object); if (!result->hasDescription()) { result->setDescription(this, ("object " + og.unparse(' '))); } - return result; } void @@ -2109,15 +2089,15 @@ QPDF::resolveObjectsInStream(int obj_stream_number) // objects appended to the file, so it is necessary to recheck the // xref table and only cache what would actually be resolved here. for (auto const& iter: offsets) { - int obj = iter.first; - QPDFObjGen og(obj, 0); + QPDFObjGen og(iter.first, 0); QPDFXRefEntry const& entry = this->m->xref_table[og]; if ((entry.getType() == 2) && (entry.getObjStreamNumber() == obj_stream_number)) { int offset = iter.second; input->seek(offset, SEEK_SET); QPDFObjectHandle oh = readObject(input, "", og, true); - this->m->obj_cache[og] = ObjCache( + updateCache( + og, QPDFObjectHandle::ObjAccessor::getObject(oh), end_before_space, end_after_space); @@ -2128,6 +2108,47 @@ QPDF::resolveObjectsInStream(int obj_stream_number) } QPDFObjectHandle +QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj) +{ + obj->setObjGen(this, og); + if (!obj->hasDescription()) { + obj->setDescription(this, "object " + og.unparse(' ')); + } + return QPDFObjectHandle::Factory::newIndirect(obj); +} + +void +QPDF::updateCache( + QPDFObjGen const& og, + std::shared_ptr<QPDFObject> const& object, + qpdf_offset_t end_before_space, + qpdf_offset_t end_after_space) +{ + object->setObjGen(this, og); + if (isCached(og)) { + auto& cache = m->obj_cache[og]; + cache.object->resetObjGen(); + cache.object->assign(object); + cache.end_before_space = end_before_space; + cache.end_after_space = end_after_space; + } else { + m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space); + } +} + +bool +QPDF::isCached(QPDFObjGen const& og) +{ + return m->obj_cache.count(og) != 0; +} + +bool +QPDF::isUnresolved(QPDFObjGen const& og) +{ + return !isCached(og) || m->obj_cache[og].object->isUnresolved(); +} + +QPDFObjectHandle QPDF::makeIndirectObject(QPDFObjectHandle oh) { int max_objid = toI(getObjectCount()); @@ -2136,19 +2157,21 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) "max object id is too high to create new objects"); } QPDFObjGen next(max_objid + 1, 0); - this->m->obj_cache[next] = + m->obj_cache[next] = ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); - return QPDFObjectHandle::Factory::newIndirect(this, next); + return newIndirect(next, m->obj_cache[next].object); } QPDFObjectHandle QPDF::reserveObjectIfNotExists(QPDFObjGen const& og) { - if ((!this->m->obj_cache.count(og)) && (!this->m->xref_table.count(og))) { + if (!isCached(og) && !m->xref_table.count(og)) { resolve(og); - replaceObject(og, QPDFObjectHandle::Factory::makeReserved()); + m->obj_cache[og].object = QPDF_Reserved::create(); + return newIndirect(og, m->obj_cache[og].object); + } else { + return getObject(og); } - return getObjectByObjGen(og); } QPDFObjectHandle @@ -2159,15 +2182,33 @@ QPDF::reserveStream(QPDFObjGen const& og) } QPDFObjectHandle +QPDF::getObject(QPDFObjGen const& og) +{ + if (!og.isIndirect()) { + return QPDFObjectHandle::newNull(); + } + if (!isCached(og)) { + m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); + } + return newIndirect(og, m->obj_cache[og].object); +} + +QPDFObjectHandle +QPDF::getObject(int objid, int generation) +{ + return getObject(QPDFObjGen(objid, generation)); +} + +QPDFObjectHandle QPDF::getObjectByObjGen(QPDFObjGen const& og) { - return QPDFObjectHandle::Factory::newIndirect(this, og); + return getObject(og); } QPDFObjectHandle QPDF::getObjectByID(int objid, int generation) { - return getObjectByObjGen(QPDFObjGen(objid, generation)); + return getObject(QPDFObjGen(objid, generation)); } void @@ -2184,14 +2225,11 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) throw std::logic_error( "QPDF::replaceObject called with indirect object handle"); } - // Force new object to appear in the cache resolve(og); // Replace the object in the object cache - this->m->ever_replaced_objects = true; - this->m->obj_cache[og] = - ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); + updateCache(og, QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); } void @@ -2254,7 +2292,7 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) throw std::logic_error( "QPDF::copyForeign called with direct object handle"); } - QPDF* other = foreign.getOwningQPDF(); + QPDF* other = foreign.getOwningQPDF(false); if (other == this) { QTC::TC("qpdf", "QPDF copyForeign not foreign"); throw std::logic_error( @@ -2444,20 +2482,18 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) QPDFObjGen local_og(result.getObjGen()); // Copy information from the foreign stream so we can pipe its // data later without keeping the original QPDF object around. - QPDF* foreign_stream_qpdf = foreign.getOwningQPDF(); - if (!foreign_stream_qpdf) { - throw std::logic_error("unable to retrieve owning qpdf" - " from foreign stream"); - } - QPDF_Stream* stream = dynamic_cast<QPDF_Stream*>( - QPDFObjectHandle::ObjAccessor::getObject(foreign).get()); - if (!stream) { + + QPDF* foreign_stream_qpdf = foreign.getOwningQPDF( + false, "unable to retrieve owning qpdf from foreign stream"); + + auto stream = QPDFObjectHandle::ObjAccessor::asStream(foreign); + if (stream == nullptr) { throw std::logic_error("unable to retrieve underlying" " stream object from foreign stream"); } std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer(); if ((foreign_stream_qpdf->m->immediate_copy_from) && - (stream_buffer.get() == nullptr)) { + (stream_buffer == nullptr)) { // Pull the stream data into a buffer before attempting // the copy operation. Do it on the source stream so that // if the source stream is copied multiple times, we don't @@ -2515,10 +2551,7 @@ QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) // cache. resolve(og1); resolve(og2); - ObjCache t = this->m->obj_cache[og1]; - this->m->ever_replaced_objects = true; - this->m->obj_cache[og1] = this->m->obj_cache[og2]; - this->m->obj_cache[og2] = t; + m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); } unsigned long long diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index 23d021ff..6fec0587 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -183,7 +183,7 @@ QPDFAcroFormDocumentHelper::getFormFields() analyze(); std::vector<QPDFFormFieldObjectHelper> result; for (auto const& iter: this->m->field_to_annotations) { - result.push_back(this->qpdf.getObjectByObjGen(iter.first)); + result.push_back(this->qpdf.getObject(iter.first)); } return result; } diff --git a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc index fd706c27..030f32d3 100644 --- a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc +++ b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc @@ -50,7 +50,7 @@ QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF& qpdf) : bool QPDFEmbeddedFileDocumentHelper::hasEmbeddedFiles() const { - return (this->m->embedded_files.get() != nullptr); + return (this->m->embedded_files != nullptr); } void diff --git a/libqpdf/QPDFFormFieldObjectHelper.cc b/libqpdf/QPDFFormFieldObjectHelper.cc index 5ff41edd..e56024af 100644 --- a/libqpdf/QPDFFormFieldObjectHelper.cc +++ b/libqpdf/QPDFFormFieldObjectHelper.cc @@ -362,13 +362,11 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances) setFieldAttribute("/V", value); } if (need_appearances) { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::logic_error( - "QPDFFormFieldObjectHelper::setV called with" - " need_appearances = true on an object that is" - " not associated with an owning QPDF"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFFormFieldObjectHelper::setV called with need_appearances = " + "true on an object that is not associated with an owning QPDF"); + QPDFAcroFormDocumentHelper(*qpdf).setNeedAppearances(true); } } @@ -883,7 +881,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance( if (found_font_in_dr && resources.isDictionary()) { QTC::TC("qpdf", "QPDFFormFieldObjectHelper get font from /DR"); if (resources.isIndirect()) { - resources = resources.getOwningQPDF()->makeIndirectObject( + resources = resources.getOwningQPDF(false)->makeIndirectObject( resources.shallowCopy()); AS.getDict().replaceKey("/Resources", resources); } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index f1b35f56..7bd563aa 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -223,7 +223,7 @@ ImageOptimizer::evaluate(std::string const& description) Pl_Discard d; Pl_Count c("count", &d); std::shared_ptr<Pipeline> p = makePipeline(description, &c); - if (p.get() == nullptr) { + if (p == nullptr) { // message issued by makePipeline return false; } @@ -252,7 +252,7 @@ void ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline) { std::shared_ptr<Pipeline> p = makePipeline("", pipeline); - if (p.get() == nullptr) { + if (p == nullptr) { // Should not be possible image.warnIfPossible("unable to create pipeline after previous" " success; image data will be lost"); @@ -417,7 +417,8 @@ QPDFJob::Members::Members() : check_is_encrypted(false), check_requires_password(false), json_input(false), - json_output(false) + json_output(false), + report_mem_usage(false) { } @@ -625,6 +626,14 @@ QPDFJob::run() << ": operation succeeded with warnings\n"; } } + if (m->report_mem_usage) { + // Call get_max_memory_usage before generating output. When + // debugging, it's easier if print statements from + // get_max_memory_usage are not interleaved with the output. + auto mem_usage = QUtil::get_max_memory_usage(); + *this->m->log->getWarn() + << "qpdf-max-memory-usage " << mem_usage << "\n"; + } } bool @@ -2161,7 +2170,7 @@ QPDFJob::doUnderOverlayForPage( std::map<unsigned long long, std::shared_ptr<QPDFAcroFormDocumentHelper>> afdh; auto make_afdh = [&](QPDFPageObjectHelper& ph) { - QPDF* q = ph.getObjectHandle().getOwningQPDF(); + QPDF* q = ph.getObjectHandle().getOwningQPDF(false); return get_afdh_for_qpdf(afdh, q); }; auto dest_afdh = make_afdh(dest_page); @@ -2243,8 +2252,7 @@ QPDFJob::handleUnderOverlay(QPDF& pdf) { validateUnderOverlay(pdf, &m->underlay); validateUnderOverlay(pdf, &m->overlay); - if ((nullptr == m->underlay.pdf.get()) && - (nullptr == m->overlay.pdf.get())) { + if ((nullptr == m->underlay.pdf) && (nullptr == m->overlay.pdf)) { return; } std::map<int, std::vector<int>> underlay_pagenos; @@ -2598,7 +2606,7 @@ static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page) { QPDFObjectHandle result = page; - if (page.getOwningQPDF() != &pdf) { + if (page.getOwningQPDF(false) != &pdf) { // Calling copyForeignObject on an object we already copied // will give us the already existing copy. result = pdf.copyForeignObject(page); diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 8a9c1470..3e148fca 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -503,6 +503,13 @@ QPDFJob::Config::removePageLabels() } QPDFJob::Config* +QPDFJob::Config::reportMemUsage() +{ + o.m->report_mem_usage = true; + return this; +} + +QPDFJob::Config* QPDFJob::Config::requiresPassword() { o.m->check_requires_password = true; diff --git a/libqpdf/QPDFNameTreeObjectHelper.cc b/libqpdf/QPDFNameTreeObjectHelper.cc index f1adca0a..f388dccf 100644 --- a/libqpdf/QPDFNameTreeObjectHelper.cc +++ b/libqpdf/QPDFNameTreeObjectHelper.cc @@ -34,6 +34,13 @@ namespace static NameTreeDetails name_tree_details; +QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper() +{ + // Must be explicit and not inline -- see QPDF_DLL_CLASS in + // README-maintainer. For this specific class, see github issue + // #745. +} + QPDFNameTreeObjectHelper::Members::Members( QPDFObjectHandle& oh, QPDF& q, bool auto_repair) : impl(std::make_shared<NNTreeImpl>(name_tree_details, q, oh, auto_repair)) diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc index 76eec678..6443416d 100644 --- a/libqpdf/QPDFNumberTreeObjectHelper.cc +++ b/libqpdf/QPDFNumberTreeObjectHelper.cc @@ -35,6 +35,13 @@ namespace static NumberTreeDetails number_tree_details; +QPDFNumberTreeObjectHelper::~QPDFNumberTreeObjectHelper() +{ + // Must be explicit and not inline -- see QPDF_DLL_CLASS in + // README-maintainer. For this specific class, see github issue + // #745. +} + QPDFNumberTreeObjectHelper::Members::Members( QPDFObjectHandle& oh, QPDF& q, bool auto_repair) : impl(std::make_shared<NNTreeImpl>(number_tree_details, q, oh, auto_repair)) diff --git a/libqpdf/QPDFObject.cc b/libqpdf/QPDFObject.cc index 382dd6c6..8b538021 100644 --- a/libqpdf/QPDFObject.cc +++ b/libqpdf/QPDFObject.cc @@ -1,47 +1,10 @@ #include <qpdf/QPDFObject.hh> -QPDFObject::QPDFObject() : - owning_qpdf(nullptr), - parsed_offset(-1) -{ -} - -std::shared_ptr<QPDFObject> -QPDFObject::do_create(QPDFObject* object) -{ - std::shared_ptr<QPDFObject> obj(object); - return obj; -} - -void -QPDFObject::setDescription(QPDF* qpdf, std::string const& description) -{ - this->owning_qpdf = qpdf; - this->object_description = description; -} - -bool -QPDFObject::getDescription(QPDF*& qpdf, std::string& description) -{ - qpdf = this->owning_qpdf; - description = this->object_description; - return this->owning_qpdf != nullptr; -} - -bool -QPDFObject::hasDescription() -{ - return this->owning_qpdf != nullptr; -} +#include <qpdf/QPDF.hh> void -QPDFObject::setParsedOffset(qpdf_offset_t offset) -{ - this->parsed_offset = offset; -} - -qpdf_offset_t -QPDFObject::getParsedOffset() +QPDFObject::doResolve() { - return this->parsed_offset; + auto og = value->og; + QPDF::Resolver::resolve(value->qpdf, og); } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 135b7c39..19a85034 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -8,6 +8,7 @@ #include <qpdf/QPDFLogger.hh> #include <qpdf/QPDFMatrix.hh> #include <qpdf/QPDFPageObjectHelper.hh> +#include <qpdf/QPDFParser.hh> #include <qpdf/QPDF_Array.hh> #include <qpdf/QPDF_Bool.hh> #include <qpdf/QPDF_Dictionary.hh> @@ -20,6 +21,7 @@ #include <qpdf/QPDF_Reserved.hh> #include <qpdf/QPDF_Stream.hh> #include <qpdf/QPDF_String.hh> +#include <qpdf/QPDF_Unresolved.hh> #include <qpdf/SparseOHArray.hh> #include <qpdf/QIntC.hh> @@ -233,29 +235,6 @@ LastChar::getLastChar() return this->last_char; } -QPDFObjectHandle::QPDFObjectHandle() : - initialized(false), - qpdf(nullptr), - reserved(false) -{ -} - -QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, QPDFObjGen const& og) : - initialized(true), - qpdf(qpdf), - og(og), - reserved(false) -{ -} - -QPDFObjectHandle::QPDFObjectHandle(std::shared_ptr<QPDFObject> const& data) : - initialized(true), - qpdf(nullptr), - obj(data), - reserved(false) -{ -} - void QPDFObjectHandle::releaseResolved() { @@ -272,26 +251,6 @@ QPDFObjectHandle::releaseResolved() } } -void -QPDFObjectHandle::setObjectDescriptionFromInput( - QPDFObjectHandle object, - QPDF* context, - std::string const& description, - std::shared_ptr<InputSource> input, - qpdf_offset_t offset) -{ - object.setObjectDescription( - context, - (input->getName() + ", " + description + " at offset " + - QUtil::int_to_string(offset))); -} - -bool -QPDFObjectHandle::isInitialized() const -{ - return this->initialized; -} - QPDFObject::object_type_e QPDFObjectHandle::getTypeCode() { @@ -305,24 +264,90 @@ QPDFObjectHandle::getTypeName() return dereference() ? this->obj->getTypeName() : "uninitialized"; } -namespace +QPDF_Array* +QPDFObjectHandle::asArray() { - template <class T> - class QPDFObjectTypeAccessor - { - public: - static bool - check(std::shared_ptr<QPDFObject> const& o) - { - return (o && dynamic_cast<T const*>(o.get())); - } - }; -} // namespace + return dereference() ? obj->as<QPDF_Array>() : nullptr; +} + +QPDF_Bool* +QPDFObjectHandle::asBool() +{ + return dereference() ? obj->as<QPDF_Bool>() : nullptr; +} + +QPDF_Dictionary* +QPDFObjectHandle::asDictionary() +{ + return dereference() ? obj->as<QPDF_Dictionary>() : nullptr; +} + +QPDF_InlineImage* +QPDFObjectHandle::asInlineImage() +{ + return dereference() ? obj->as<QPDF_InlineImage>() : nullptr; +} + +QPDF_Integer* +QPDFObjectHandle::asInteger() +{ + return dereference() ? obj->as<QPDF_Integer>() : nullptr; +} + +QPDF_Name* +QPDFObjectHandle::asName() +{ + return dereference() ? obj->as<QPDF_Name>() : nullptr; +} + +QPDF_Null* +QPDFObjectHandle::asNull() +{ + return dereference() ? obj->as<QPDF_Null>() : nullptr; +} + +QPDF_Operator* +QPDFObjectHandle::asOperator() +{ + return dereference() ? obj->as<QPDF_Operator>() : nullptr; +} + +QPDF_Real* +QPDFObjectHandle::asReal() +{ + return dereference() ? obj->as<QPDF_Real>() : nullptr; +} + +QPDF_Reserved* +QPDFObjectHandle::asReserved() +{ + return dereference() ? obj->as<QPDF_Reserved>() : nullptr; +} + +QPDF_Stream* +QPDFObjectHandle::asStream() +{ + return dereference() ? obj->as<QPDF_Stream>() : nullptr; +} + +QPDF_Stream* +QPDFObjectHandle::asStreamWithAssert() +{ + auto stream = asStream(); + assertType("stream", stream); + return stream; +} + +QPDF_String* +QPDFObjectHandle::asString() +{ + return dereference() ? obj->as<QPDF_String>() : nullptr; +} bool QPDFObjectHandle::isBool() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Bool>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_boolean); } bool @@ -331,26 +356,26 @@ QPDFObjectHandle::isDirectNull() const // Don't call dereference() -- this is a const method, and we know // objid == 0, so there's nothing to resolve. return ( - this->initialized && (getObjectID() == 0) && - QPDFObjectTypeAccessor<QPDF_Null>::check(obj)); + isInitialized() && (getObjectID() == 0) && + (obj->getTypeCode() == QPDFObject::ot_null)); } bool QPDFObjectHandle::isNull() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Null>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_null); } bool QPDFObjectHandle::isInteger() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Integer>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_integer); } bool QPDFObjectHandle::isReal() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Real>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_real); } bool @@ -387,57 +412,49 @@ QPDFObjectHandle::getValueAsNumber(double& value) bool QPDFObjectHandle::isName() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Name>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_name); } bool QPDFObjectHandle::isString() { - return dereference() && QPDFObjectTypeAccessor<QPDF_String>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_string); } bool QPDFObjectHandle::isOperator() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Operator>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_operator); } bool QPDFObjectHandle::isInlineImage() { - return dereference() && - QPDFObjectTypeAccessor<QPDF_InlineImage>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_inlineimage); } bool QPDFObjectHandle::isArray() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Array>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_array); } bool QPDFObjectHandle::isDictionary() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_dictionary); } bool QPDFObjectHandle::isStream() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Stream>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_stream); } bool QPDFObjectHandle::isReserved() { - // dereference will clear reserved if this has been replaced - return dereference() && this->reserved; -} - -bool -QPDFObjectHandle::isIndirect() -{ - return this->initialized && (getObjectID() != 0); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_reserved); } bool @@ -475,8 +492,9 @@ QPDFObjectHandle::isStreamOfType( bool QPDFObjectHandle::getBoolValue() { - if (isBool()) { - return dynamic_cast<QPDF_Bool*>(obj.get())->getVal(); + auto boolean = asBool(); + if (boolean) { + return boolean->getVal(); } else { typeWarning("boolean", "returning false"); QTC::TC("qpdf", "QPDFObjectHandle boolean returning false"); @@ -487,10 +505,11 @@ QPDFObjectHandle::getBoolValue() bool QPDFObjectHandle::getValueAsBool(bool& value) { - if (!isBool()) { + auto boolean = asBool(); + if (boolean == nullptr) { return false; } - value = dynamic_cast<QPDF_Bool*>(obj.get())->getVal(); + value = boolean->getVal(); return true; } @@ -499,8 +518,9 @@ QPDFObjectHandle::getValueAsBool(bool& value) long long QPDFObjectHandle::getIntValue() { - if (isInteger()) { - return dynamic_cast<QPDF_Integer*>(obj.get())->getVal(); + auto integer = asInteger(); + if (integer) { + return integer->getVal(); } else { typeWarning("integer", "returning 0"); QTC::TC("qpdf", "QPDFObjectHandle integer returning 0"); @@ -511,10 +531,11 @@ QPDFObjectHandle::getIntValue() bool QPDFObjectHandle::getValueAsInt(long long& value) { - if (!isInteger()) { + auto integer = asInteger(); + if (integer == nullptr) { return false; } - value = dynamic_cast<QPDF_Integer*>(obj.get())->getVal(); + value = integer->getVal(); return true; } @@ -610,8 +631,9 @@ QPDFObjectHandle::getValueAsUInt(unsigned int& value) std::string QPDFObjectHandle::getRealValue() { - if (isReal()) { - return dynamic_cast<QPDF_Real*>(obj.get())->getVal(); + auto real = asReal(); + if (real) { + return real->getVal(); } else { typeWarning("real", "returning 0.0"); QTC::TC("qpdf", "QPDFObjectHandle real returning 0.0"); @@ -622,10 +644,11 @@ QPDFObjectHandle::getRealValue() bool QPDFObjectHandle::getValueAsReal(std::string& value) { - if (!isReal()) { + auto real = asReal(); + if (real == nullptr) { return false; } - value = dynamic_cast<QPDF_Real*>(obj.get())->getVal(); + value = real->getVal(); return true; } @@ -634,8 +657,9 @@ QPDFObjectHandle::getValueAsReal(std::string& value) std::string QPDFObjectHandle::getName() { - if (isName()) { - return dynamic_cast<QPDF_Name*>(obj.get())->getName(); + auto name = asName(); + if (name) { + return name->getName(); } else { typeWarning("name", "returning dummy name"); QTC::TC("qpdf", "QPDFObjectHandle name returning dummy name"); @@ -646,10 +670,11 @@ QPDFObjectHandle::getName() bool QPDFObjectHandle::getValueAsName(std::string& value) { - if (!isName()) { + auto name = asName(); + if (name == nullptr) { return false; } - value = dynamic_cast<QPDF_Name*>(obj.get())->getName(); + value = name->getName(); return true; } @@ -658,8 +683,9 @@ QPDFObjectHandle::getValueAsName(std::string& value) std::string QPDFObjectHandle::getStringValue() { - if (isString()) { - return dynamic_cast<QPDF_String*>(obj.get())->getVal(); + auto str = asString(); + if (str) { + return str->getVal(); } else { typeWarning("string", "returning empty string"); QTC::TC("qpdf", "QPDFObjectHandle string returning empty string"); @@ -670,18 +696,20 @@ QPDFObjectHandle::getStringValue() bool QPDFObjectHandle::getValueAsString(std::string& value) { - if (!isString()) { + auto str = asString(); + if (str == nullptr) { return false; } - value = dynamic_cast<QPDF_String*>(obj.get())->getVal(); + value = str->getVal(); return true; } std::string QPDFObjectHandle::getUTF8Value() { - if (isString()) { - return dynamic_cast<QPDF_String*>(obj.get())->getUTF8Val(); + auto str = asString(); + if (str) { + return str->getUTF8Val(); } else { typeWarning("string", "returning empty string"); QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8"); @@ -692,10 +720,11 @@ QPDFObjectHandle::getUTF8Value() bool QPDFObjectHandle::getValueAsUTF8(std::string& value) { - if (!isString()) { + auto str = asString(); + if (str == nullptr) { return false; } - value = dynamic_cast<QPDF_String*>(obj.get())->getUTF8Val(); + value = str->getUTF8Val(); return true; } @@ -704,8 +733,9 @@ QPDFObjectHandle::getValueAsUTF8(std::string& value) std::string QPDFObjectHandle::getOperatorValue() { - if (isOperator()) { - return dynamic_cast<QPDF_Operator*>(obj.get())->getVal(); + auto op = asOperator(); + if (op) { + return op->getVal(); } else { typeWarning("operator", "returning fake value"); QTC::TC("qpdf", "QPDFObjectHandle operator returning fake value"); @@ -716,18 +746,20 @@ QPDFObjectHandle::getOperatorValue() bool QPDFObjectHandle::getValueAsOperator(std::string& value) { - if (!isOperator()) { + auto op = asOperator(); + if (op == nullptr) { return false; } - value = dynamic_cast<QPDF_Operator*>(obj.get())->getVal(); + value = op->getVal(); return true; } std::string QPDFObjectHandle::getInlineImageValue() { - if (isInlineImage()) { - return dynamic_cast<QPDF_InlineImage*>(obj.get())->getVal(); + auto image = asInlineImage(); + if (image) { + return image->getVal(); } else { typeWarning("inlineimage", "returning empty data"); QTC::TC("qpdf", "QPDFObjectHandle inlineimage returning empty data"); @@ -738,10 +770,11 @@ QPDFObjectHandle::getInlineImageValue() bool QPDFObjectHandle::getValueAsInlineImage(std::string& value) { - if (!isInlineImage()) { + auto image = asInlineImage(); + if (image == nullptr) { return false; } - value = dynamic_cast<QPDF_InlineImage*>(obj.get())->getVal(); + value = image->getVal(); return true; } @@ -756,8 +789,9 @@ QPDFObjectHandle::aitems() int QPDFObjectHandle::getArrayNItems() { - if (isArray()) { - return dynamic_cast<QPDF_Array*>(obj.get())->getNItems(); + auto array = asArray(); + if (array) { + return array->getNItems(); } else { typeWarning("array", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle array treating as empty"); @@ -769,11 +803,12 @@ QPDFObjectHandle QPDFObjectHandle::getArrayItem(int n) { QPDFObjectHandle result; - if (isArray() && (n < getArrayNItems()) && (n >= 0)) { - result = dynamic_cast<QPDF_Array*>(obj.get())->getItem(n); + auto array = asArray(); + if (array && (n < array->getNItems()) && (n >= 0)) { + result = array->getItem(n); } else { result = newNull(); - if (isArray()) { + if (array) { objectWarning("returning null for out of bounds array access"); QTC::TC("qpdf", "QPDFObjectHandle array bounds"); } else { @@ -782,7 +817,7 @@ QPDFObjectHandle::getArrayItem(int n) } QPDF* context = nullptr; std::string description; - if (this->obj->getDescription(context, description)) { + if (obj->getDescription(context, description)) { result.setObjectDescription( context, description + " -> null returned from invalid array access"); @@ -794,14 +829,12 @@ QPDFObjectHandle::getArrayItem(int n) bool QPDFObjectHandle::isRectangle() { - if (!isArray()) { - return false; - } - if (getArrayNItems() != 4) { + auto array = asArray(); + if ((array == nullptr) || (array->getNItems() != 4)) { return false; } for (int i = 0; i < 4; ++i) { - if (!getArrayItem(i).isNumber()) { + if (!array->getItem(i).isNumber()) { return false; } } @@ -811,14 +844,12 @@ QPDFObjectHandle::isRectangle() bool QPDFObjectHandle::isMatrix() { - if (!isArray()) { - return false; - } - if (getArrayNItems() != 6) { + auto array = asArray(); + if ((array == nullptr) || (array->getNItems() != 6)) { return false; } for (int i = 0; i < 6; ++i) { - if (!getArrayItem(i).isNumber()) { + if (!array->getItem(i).isNumber()) { return false; } } @@ -830,13 +861,14 @@ QPDFObjectHandle::getArrayAsRectangle() { Rectangle result; if (isRectangle()) { + auto array = asArray(); // Rectangle coordinates are always supposed to be llx, lly, // urx, ury, but files have been found in the wild where // llx > urx or lly > ury. - double i0 = getArrayItem(0).getNumericValue(); - double i1 = getArrayItem(1).getNumericValue(); - double i2 = getArrayItem(2).getNumericValue(); - double i3 = getArrayItem(3).getNumericValue(); + double i0 = array->getItem(0).getNumericValue(); + double i1 = array->getItem(1).getNumericValue(); + double i2 = array->getItem(2).getNumericValue(); + double i3 = array->getItem(3).getNumericValue(); result = Rectangle( std::min(i0, i2), std::min(i1, i3), @@ -851,13 +883,14 @@ QPDFObjectHandle::getArrayAsMatrix() { Matrix result; if (isMatrix()) { + auto array = asArray(); result = Matrix( - getArrayItem(0).getNumericValue(), - getArrayItem(1).getNumericValue(), - getArrayItem(2).getNumericValue(), - getArrayItem(3).getNumericValue(), - getArrayItem(4).getNumericValue(), - getArrayItem(5).getNumericValue()); + array->getItem(0).getNumericValue(), + array->getItem(1).getNumericValue(), + array->getItem(2).getNumericValue(), + array->getItem(3).getNumericValue(), + array->getItem(4).getNumericValue(), + array->getItem(5).getNumericValue()); } return result; } @@ -866,8 +899,9 @@ std::vector<QPDFObjectHandle> QPDFObjectHandle::getArrayAsVector() { std::vector<QPDFObjectHandle> result; - if (isArray()) { - dynamic_cast<QPDF_Array*>(obj.get())->getAsVector(result); + auto array = asArray(); + if (array) { + array->getAsVector(result); } else { typeWarning("array", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle array treating as empty vector"); @@ -880,9 +914,10 @@ QPDFObjectHandle::getArrayAsVector() void QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item) { - if (isArray()) { + auto array = asArray(); + if (array) { checkOwnership(item); - dynamic_cast<QPDF_Array*>(obj.get())->setItem(n, item); + array->setItem(n, item); } else { typeWarning("array", "ignoring attempt to set item"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring set item"); @@ -892,11 +927,12 @@ QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item) void QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items) { - if (isArray()) { + auto array = asArray(); + if (array) { for (auto const& item: items) { checkOwnership(item); } - dynamic_cast<QPDF_Array*>(obj.get())->setFromVector(items); + array->setFromVector(items); } else { typeWarning("array", "ignoring attempt to replace items"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring replace items"); @@ -906,8 +942,9 @@ QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items) void QPDFObjectHandle::insertItem(int at, QPDFObjectHandle const& item) { - if (isArray()) { - dynamic_cast<QPDF_Array*>(obj.get())->insertItem(at, item); + auto array = asArray(); + if (array) { + array->insertItem(at, item); } else { typeWarning("array", "ignoring attempt to insert item"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring insert item"); @@ -924,9 +961,10 @@ QPDFObjectHandle::insertItemAndGetNew(int at, QPDFObjectHandle const& item) void QPDFObjectHandle::appendItem(QPDFObjectHandle const& item) { - if (isArray()) { + auto array = asArray(); + if (array) { checkOwnership(item); - dynamic_cast<QPDF_Array*>(obj.get())->appendItem(item); + array->appendItem(item); } else { typeWarning("array", "ignoring attempt to append item"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring append item"); @@ -943,10 +981,11 @@ QPDFObjectHandle::appendItemAndGetNew(QPDFObjectHandle const& item) void QPDFObjectHandle::eraseItem(int at) { - if (isArray() && (at < getArrayNItems()) && (at >= 0)) { - dynamic_cast<QPDF_Array*>(obj.get())->eraseItem(at); + auto array = asArray(); + if (array && (at < array->getNItems()) && (at >= 0)) { + array->eraseItem(at); } else { - if (isArray()) { + if (array) { objectWarning("ignoring attempt to erase out of bounds array item"); QTC::TC("qpdf", "QPDFObjectHandle erase array bounds"); } else { @@ -960,8 +999,9 @@ QPDFObjectHandle QPDFObjectHandle::eraseItemAndGetOld(int at) { auto result = QPDFObjectHandle::newNull(); - if (isArray() && (at < getArrayNItems()) && (at >= 0)) { - result = getArrayItem(at); + auto array = asArray(); + if (array && (at < array->getNItems()) && (at >= 0)) { + result = array->getItem(at); } eraseItem(at); return result; @@ -978,8 +1018,9 @@ QPDFObjectHandle::ditems() bool QPDFObjectHandle::hasKey(std::string const& key) { - if (isDictionary()) { - return dynamic_cast<QPDF_Dictionary*>(obj.get())->hasKey(key); + auto dict = asDictionary(); + if (dict) { + return dict->hasKey(key); } else { typeWarning( "dictionary", "returning false for a key containment request"); @@ -992,15 +1033,16 @@ QPDFObjectHandle QPDFObjectHandle::getKey(std::string const& key) { QPDFObjectHandle result; - if (isDictionary()) { - result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getKey(key); + auto dict = asDictionary(); + if (dict) { + result = dict->getKey(key); } else { typeWarning("dictionary", "returning null for attempted key retrieval"); QTC::TC("qpdf", "QPDFObjectHandle dictionary null for getKey"); result = newNull(); QPDF* qpdf = nullptr; std::string description; - if (this->obj->getDescription(qpdf, description)) { + if (obj->getDescription(qpdf, description)) { result.setObjectDescription( qpdf, (description + " -> null returned from getting key " + key + @@ -1020,8 +1062,9 @@ std::set<std::string> QPDFObjectHandle::getKeys() { std::set<std::string> result; - if (isDictionary()) { - result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getKeys(); + auto dict = asDictionary(); + if (dict) { + result = dict->getKeys(); } else { typeWarning("dictionary", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle dictionary empty set for getKeys"); @@ -1033,8 +1076,9 @@ std::map<std::string, QPDFObjectHandle> QPDFObjectHandle::getDictAsMap() { std::map<std::string, QPDFObjectHandle> result; - if (isDictionary()) { - result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getAsMap(); + auto dict = asDictionary(); + if (dict) { + result = dict->getAsMap(); } else { typeWarning("dictionary", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle dictionary empty map for asMap"); @@ -1219,23 +1263,16 @@ QPDFObjectHandle::getUniqueResourceName( " QPDFObjectHandle::getUniqueResourceName"); } -// Indirect object accessors -QPDF* -QPDFObjectHandle::getOwningQPDF() -{ - // Will be null for direct objects - return this->qpdf; -} - // Dictionary mutators void QPDFObjectHandle::replaceKey( std::string const& key, QPDFObjectHandle const& value) { - if (isDictionary()) { + auto dict = asDictionary(); + if (dict) { checkOwnership(value); - dynamic_cast<QPDF_Dictionary*>(obj.get())->replaceKey(key, value); + dict->replaceKey(key, value); } else { typeWarning("dictionary", "ignoring key replacement request"); QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring replaceKey"); @@ -1262,8 +1299,9 @@ QPDFObjectHandle::replaceKeyAndGetOld( void QPDFObjectHandle::removeKey(std::string const& key) { - if (isDictionary()) { - dynamic_cast<QPDF_Dictionary*>(obj.get())->removeKey(key); + auto dict = asDictionary(); + if (dict) { + dict->removeKey(key); } else { typeWarning("dictionary", "ignoring key removal request"); QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removeKey"); @@ -1274,8 +1312,9 @@ QPDFObjectHandle QPDFObjectHandle::removeKeyAndGetOld(std::string const& key) { auto result = QPDFObjectHandle::newNull(); - if (isDictionary()) { - result = getKey(key); + auto dict = asDictionary(); + if (dict) { + result = dict->getKey(key); } removeKey(key); return result; @@ -1292,50 +1331,43 @@ QPDFObjectHandle::replaceOrRemoveKey( QPDFObjectHandle QPDFObjectHandle::getDict() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getDict(); + return asStreamWithAssert()->getDict(); } void QPDFObjectHandle::setFilterOnWrite(bool val) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->setFilterOnWrite(val); + asStreamWithAssert()->setFilterOnWrite(val); } bool QPDFObjectHandle::getFilterOnWrite() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getFilterOnWrite(); + return asStreamWithAssert()->getFilterOnWrite(); } bool QPDFObjectHandle::isDataModified() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->isDataModified(); + return asStreamWithAssert()->isDataModified(); } void QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceDict(new_dict); + asStreamWithAssert()->replaceDict(new_dict); } std::shared_ptr<Buffer> QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamData(level); + return asStreamWithAssert()->getStreamData(level); } std::shared_ptr<Buffer> QPDFObjectHandle::getRawStreamData() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getRawStreamData(); + return asStreamWithAssert()->getRawStreamData(); } bool @@ -1347,8 +1379,7 @@ QPDFObjectHandle::pipeStreamData( bool suppress_warnings, bool will_retry) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->pipeStreamData( + return asStreamWithAssert()->pipeStreamData( p, filtering_attempted, encode_flags, @@ -1365,9 +1396,8 @@ QPDFObjectHandle::pipeStreamData( bool suppress_warnings, bool will_retry) { - assertStream(); bool filtering_attempted; - dynamic_cast<QPDF_Stream*>(obj.get())->pipeStreamData( + asStreamWithAssert()->pipeStreamData( p, &filtering_attempted, encode_flags, @@ -1401,9 +1431,7 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - data, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(data, filter, decode_parms); } void @@ -1412,14 +1440,12 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); auto b = std::make_shared<Buffer>(data.length()); unsigned char* bp = b->getBuffer(); if (bp) { memcpy(bp, data.c_str(), data.length()); } - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - b, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(b, filter, decode_parms); } void @@ -1428,9 +1454,7 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - provider, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(provider, filter, decode_parms); } namespace @@ -1479,11 +1503,9 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - sdp, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(sdp, filter, decode_parms); } void @@ -1492,29 +1514,9 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - sdp, filter, decode_parms); -} - -QPDFObjGen -QPDFObjectHandle::getObjGen() const -{ - return og; -} - -int -QPDFObjectHandle::getObjectID() const -{ - return og.getObj(); -} - -int -QPDFObjectHandle::getGeneration() const -{ - return og.getGen(); + asStreamWithAssert()->replaceStreamData(sdp, filter, decode_parms); } std::map<std::string, QPDFObjectHandle> @@ -1529,10 +1531,11 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( { all_description = description; std::vector<QPDFObjectHandle> result; - if (isArray()) { - int n_items = getArrayNItems(); + auto array = asArray(); + if (array) { + int n_items = array->getNItems(); for (int i = 0; i < n_items; ++i) { - QPDFObjectHandle item = getArrayItem(i); + QPDFObjectHandle item = array->getItem(i); if (item.isStream()) { result.push_back(item); } else { @@ -1664,16 +1667,15 @@ QPDFObjectHandle::coalesceContentStreams() // files may have pages that are invalid in other ways. return; } - QPDF* qpdf = getOwningQPDF(); - if (qpdf == nullptr) { - // Should not be possible for a page object to not have an - // owning PDF unless it was manually constructed in some - // incorrect way. However, it can happen in a PDF file whose - // page structure is direct, which is against spec but still - // possible to hand construct, as in fuzz issue 27393. - throw std::runtime_error("coalesceContentStreams called on object" - " with no associated PDF file"); - } + // Should not be possible for a page object to not have an + // owning PDF unless it was manually constructed in some + // incorrect way. However, it can happen in a PDF file whose + // page structure is direct, which is against spec but still + // possible to hand construct, as in fuzz issue 27393. + QPDF* qpdf = getOwningQPDF( + false, + "coalesceContentStreams called on object with no associated PDF file"); + QPDFObjectHandle new_contents = newStream(qpdf); this->replaceKey("/Contents", new_contents); @@ -1700,18 +1702,16 @@ QPDFObjectHandle::unparseResolved() if (!dereference()) { throw std::logic_error( "attempted to dereference an uninitialized QPDFObjectHandle"); - } else if (this->reserved) { - throw std::logic_error( - "QPDFObjectHandle: attempting to unparse a reserved object"); } - return this->obj->unparse(); + return obj->unparse(); } std::string QPDFObjectHandle::unparseBinary() { - if (this->isString()) { - return dynamic_cast<QPDF_String*>(this->obj.get())->unparse(true); + auto str = asString(); + if (str) { + return str->unparse(true); } else { return unparse(); } @@ -1727,16 +1727,13 @@ QPDFObjectHandle::getJSON(bool dereference_indirect) JSON QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) { - if ((!dereference_indirect) && this->isIndirect()) { + if ((!dereference_indirect) && isIndirect()) { return JSON::makeString(unparse()); } else if (!dereference()) { throw std::logic_error( "attempted to dereference an uninitialized QPDFObjectHandle"); - } else if (this->reserved) { - throw std::logic_error( - "QPDFObjectHandle: attempting to unparse a reserved object"); } else { - return this->obj->getJSON(json_version); + return obj->getJSON(json_version); } } @@ -1748,8 +1745,7 @@ QPDFObjectHandle::getStreamJSON( Pipeline* p, std::string const& data_filename) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON( + return asStreamWithAssert()->getStreamJSON( json_version, json_data, decode_level, p, data_filename); } @@ -1918,8 +1914,8 @@ QPDFObjectHandle::parseContentStream_data( tokenizer.readToken(input, "content", true); qpdf_offset_t offset = input->getLastOffset(); input->seek(offset, SEEK_SET); - QPDFObjectHandle obj = parseInternal( - input, "content", tokenizer, empty, nullptr, context, true); + auto obj = QPDFParser(input, "content", tokenizer, nullptr, context) + .parse(empty, true); if (!obj.isInitialized()) { // EOF break; @@ -1969,8 +1965,7 @@ QPDFObjectHandle::addContentTokenFilter(std::shared_ptr<TokenFilter> filter) void QPDFObjectHandle::addTokenFilter(std::shared_ptr<TokenFilter> filter) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->addTokenFilter(filter); + return asStreamWithAssert()->addTokenFilter(filter); } QPDFObjectHandle @@ -1982,497 +1977,8 @@ QPDFObjectHandle::parse( StringDecrypter* decrypter, QPDF* context) { - return parseInternal( - input, object_description, tokenizer, empty, decrypter, context, false); -} - -QPDFObjectHandle -QPDFObjectHandle::parseInternal( - std::shared_ptr<InputSource> input, - std::string const& object_description, - QPDFTokenizer& tokenizer, - bool& empty, - StringDecrypter* decrypter, - QPDF* context, - bool content_stream) -{ - // This method must take care not to resolve any objects. Don't - // check the type of any object without first ensuring that it is - // a direct object. Otherwise, doing so may have the side effect - // of reading the object and changing the file pointer. If you do - // this, it will cause a logic error to be thrown from - // QPDF::inParse(). - - QPDF::ParseGuard pg(context); - - empty = false; - - QPDFObjectHandle object; - bool set_offset = false; - - std::vector<SparseOHArray> olist_stack; - olist_stack.push_back(SparseOHArray()); - std::vector<parser_state_e> state_stack; - state_stack.push_back(st_top); - std::vector<qpdf_offset_t> offset_stack; - qpdf_offset_t offset = input->tell(); - offset_stack.push_back(offset); - bool done = false; - int bad_count = 0; - int good_count = 0; - bool b_contents = false; - std::vector<std::string> contents_string_stack; - contents_string_stack.push_back(""); - std::vector<qpdf_offset_t> contents_offset_stack; - contents_offset_stack.push_back(-1); - while (!done) { - bool bad = false; - SparseOHArray& olist = olist_stack.back(); - parser_state_e state = state_stack.back(); - offset = offset_stack.back(); - std::string& contents_string = contents_string_stack.back(); - qpdf_offset_t& contents_offset = contents_offset_stack.back(); - - object = QPDFObjectHandle(); - set_offset = false; - - QPDFTokenizer::Token token = - tokenizer.readToken(input, object_description, true); - std::string const& token_error_message = token.getErrorMessage(); - if (!token_error_message.empty()) { - // Tokens other than tt_bad can still generate warnings. - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - token_error_message)); - } - - switch (token.getType()) { - case QPDFTokenizer::tt_eof: - if (!content_stream) { - QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "unexpected EOF")); - } - bad = true; - state = st_eof; - break; - - case QPDFTokenizer::tt_bad: - QTC::TC("qpdf", "QPDFObjectHandle bad token in parse"); - bad = true; - object = newNull(); - break; - - case QPDFTokenizer::tt_brace_open: - case QPDFTokenizer::tt_brace_close: - QTC::TC("qpdf", "QPDFObjectHandle bad brace"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "treating unexpected brace token as null")); - bad = true; - object = newNull(); - break; - - case QPDFTokenizer::tt_array_close: - if (state == st_array) { - state = st_stop; - } else { - QTC::TC("qpdf", "QPDFObjectHandle bad array close"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "treating unexpected array close token as null")); - bad = true; - object = newNull(); - } - break; - - case QPDFTokenizer::tt_dict_close: - if (state == st_dictionary) { - state = st_stop; - } else { - QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "unexpected dictionary close token")); - bad = true; - object = newNull(); - } - break; - - case QPDFTokenizer::tt_array_open: - case QPDFTokenizer::tt_dict_open: - if (olist_stack.size() > 500) { - QTC::TC("qpdf", "QPDFObjectHandle too deep"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "ignoring excessively deeply nested data structure")); - bad = true; - object = newNull(); - state = st_top; - } else { - olist_stack.push_back(SparseOHArray()); - state = st_start; - offset_stack.push_back(input->tell()); - state_stack.push_back( - (token.getType() == QPDFTokenizer::tt_array_open) - ? st_array - : st_dictionary); - b_contents = false; - contents_string_stack.push_back(""); - contents_offset_stack.push_back(-1); - } - break; - - case QPDFTokenizer::tt_bool: - object = newBool((token.getValue() == "true")); - break; - - case QPDFTokenizer::tt_null: - object = newNull(); - break; - - case QPDFTokenizer::tt_integer: - object = newInteger(QUtil::string_to_ll(token.getValue().c_str())); - break; - - case QPDFTokenizer::tt_real: - object = newReal(token.getValue()); - break; - - case QPDFTokenizer::tt_name: - { - std::string name = token.getValue(); - object = newName(name); - - if (name == "/Contents") { - b_contents = true; - } else { - b_contents = false; - } - } - break; - - case QPDFTokenizer::tt_word: - { - std::string const& value = token.getValue(); - if (content_stream) { - object = QPDFObjectHandle::newOperator(value); - } else if ( - (value == "R") && (state != st_top) && - (olist.size() >= 2) && - (!olist.at(olist.size() - 1).isIndirect()) && - (olist.at(olist.size() - 1).isInteger()) && - (!olist.at(olist.size() - 2).isIndirect()) && - (olist.at(olist.size() - 2).isInteger())) { - if (context == nullptr) { - QTC::TC( - "qpdf", - "QPDFObjectHandle indirect without context"); - throw std::logic_error( - "QPDFObjectHandle::parse called without context" - " on an object with indirect references"); - } - // Try to resolve indirect objects - object = newIndirect( - context, - QPDFObjGen( - olist.at(olist.size() - 2).getIntValueAsInt(), - olist.at(olist.size() - 1).getIntValueAsInt())); - olist.remove_last(); - olist.remove_last(); - } else if ((value == "endobj") && (state == st_top)) { - // We just saw endobj without having read - // anything. Treat this as a null and do not move - // the input source's offset. - object = newNull(); - input->seek(input->getLastOffset(), SEEK_SET); - empty = true; - } else { - QTC::TC("qpdf", "QPDFObjectHandle treat word as string"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "unknown token while reading object;" - " treating as string")); - bad = true; - object = newString(value); - } - } - break; - - case QPDFTokenizer::tt_string: - { - std::string val = token.getValue(); - if (decrypter) { - if (b_contents) { - contents_string = val; - contents_offset = input->getLastOffset(); - b_contents = false; - } - decrypter->decryptString(val); - } - object = QPDFObjectHandle::newString(val); - } - - break; - - default: - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "treating unknown token type as null while " - "reading object")); - bad = true; - object = newNull(); - break; - } - - if ((!object.isInitialized()) && - (!((state == st_start) || (state == st_stop) || - (state == st_eof)))) { - throw std::logic_error("QPDFObjectHandle::parseInternal: " - "unexpected uninitialized object"); - object = newNull(); - } - - if (bad) { - ++bad_count; - good_count = 0; - } else { - ++good_count; - if (good_count > 3) { - bad_count = 0; - } - } - if (bad_count > 5) { - // We had too many consecutive errors without enough - // intervening successful objects. Give up. - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "too many errors; giving up on reading object")); - state = st_top; - object = newNull(); - } - - switch (state) { - case st_eof: - if (state_stack.size() > 1) { - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "parse error while reading object")); - } - done = true; - // In content stream mode, leave object uninitialized to - // indicate EOF - if (!content_stream) { - object = newNull(); - } - break; - - case st_dictionary: - case st_array: - setObjectDescriptionFromInput( - object, - context, - object_description, - input, - input->getLastOffset()); - object.setParsedOffset(input->getLastOffset()); - set_offset = true; - olist.append(object); - break; - - case st_top: - done = true; - break; - - case st_start: - break; - - case st_stop: - if ((state_stack.size() < 2) || (olist_stack.size() < 2)) { - throw std::logic_error( - "QPDFObjectHandle::parseInternal: st_stop encountered" - " with insufficient elements in stack"); - } - parser_state_e old_state = state_stack.back(); - state_stack.pop_back(); - if (old_state == st_array) { - // There's no newArray(SparseOHArray) since - // SparseOHArray is not part of the public API. - object = QPDFObjectHandle(QPDF_Array::create(olist)); - setObjectDescriptionFromInput( - object, context, object_description, input, offset); - // The `offset` points to the next of "[". Set the - // rewind offset to point to the beginning of "[". - // This has been explicitly tested with whitespace - // surrounding the array start delimiter. - // getLastOffset points to the array end token and - // therefore can't be used here. - object.setParsedOffset(offset - 1); - set_offset = true; - } else if (old_state == st_dictionary) { - // Convert list to map. Alternating elements are keys. - // Attempt to recover more or less gracefully from - // invalid dictionaries. - std::set<std::string> names; - size_t n_elements = olist.size(); - for (size_t i = 0; i < n_elements; ++i) { - QPDFObjectHandle oh = olist.at(i); - if ((!oh.isIndirect()) && oh.isName()) { - names.insert(oh.getName()); - } - } - - std::map<std::string, QPDFObjectHandle> dict; - int next_fake_key = 1; - for (unsigned int i = 0; i < olist.size(); ++i) { - QPDFObjectHandle key_obj = olist.at(i); - QPDFObjectHandle val; - if (key_obj.isIndirect() || (!key_obj.isName())) { - bool found_fake = false; - std::string candidate; - while (!found_fake) { - candidate = "/QPDFFake" + - QUtil::int_to_string(next_fake_key++); - found_fake = (names.count(candidate) == 0); - QTC::TC( - "qpdf", - "QPDFObjectHandle found fake", - (found_fake ? 0 : 1)); - } - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - offset, - "expected dictionary key but found" - " non-name object; inserting key " + - candidate)); - val = key_obj; - key_obj = newName(candidate); - } else if (i + 1 >= olist.size()) { - QTC::TC("qpdf", "QPDFObjectHandle no val for last key"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - offset, - "dictionary ended prematurely; " - "using null as value for last key")); - val = newNull(); - setObjectDescriptionFromInput( - val, context, object_description, input, offset); - } else { - val = olist.at(++i); - } - std::string key = key_obj.getName(); - if (dict.count(key) > 0) { - QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - offset, - "dictionary has duplicated key " + key + - "; last occurrence overrides earlier " - "ones")); - } - dict[key] = val; - } - if (!contents_string.empty() && dict.count("/Type") && - dict["/Type"].isNameAndEquals("/Sig") && - dict.count("/ByteRange") && dict.count("/Contents") && - dict["/Contents"].isString()) { - dict["/Contents"] = - QPDFObjectHandle::newString(contents_string); - dict["/Contents"].setParsedOffset(contents_offset); - } - object = newDictionary(dict); - setObjectDescriptionFromInput( - object, context, object_description, input, offset); - // The `offset` points to the next of "<<". Set the - // rewind offset to point to the beginning of "<<". - // This has been explicitly tested with whitespace - // surrounding the dictionary start delimiter. - // getLastOffset points to the dictionary end token - // and therefore can't be used here. - object.setParsedOffset(offset - 2); - set_offset = true; - } - olist_stack.pop_back(); - offset_stack.pop_back(); - if (state_stack.back() == st_top) { - done = true; - } else { - olist_stack.back().append(object); - } - contents_string_stack.pop_back(); - contents_offset_stack.pop_back(); - } - } - - if (!set_offset) { - setObjectDescriptionFromInput( - object, context, object_description, input, offset); - object.setParsedOffset(offset); - } - return object; + return QPDFParser(input, object_description, tokenizer, decrypter, context) + .parse(empty, false); } qpdf_offset_t @@ -2485,31 +1991,6 @@ QPDFObjectHandle::getParsedOffset() } } -void -QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset) -{ - // This is called during parsing on newly created direct objects, - // so we can't call dereference() here. - if (this->obj.get()) { - this->obj->setParsedOffset(offset); - } -} - -QPDFObjectHandle -QPDFObjectHandle::newIndirect(QPDF* qpdf, QPDFObjGen const& og) -{ - if (!og.isIndirect()) { - // Special case: QPDF uses objid 0 as a sentinel for direct - // objects, and the PDF specification doesn't allow for object - // 0. Treat indirect references to object 0 as null so that we - // never create an indirect object with objid 0. - QTC::TC("qpdf", "QPDFObjectHandle indirect with 0 objid"); - return newNull(); - } - - return QPDFObjectHandle(qpdf, og); -} - QPDFObjectHandle QPDFObjectHandle::newBool(bool value) { @@ -2679,8 +2160,7 @@ QPDFObjectHandle::newStream(QPDF* qpdf) QPDFObjectHandle stream_dict = newDictionary(); QPDFObjectHandle result = qpdf->makeIndirectObject(QPDFObjectHandle( QPDF_Stream::create(qpdf, QPDFObjGen(), stream_dict, 0, 0))); - result.dereference(); - QPDF_Stream* stream = dynamic_cast<QPDF_Stream*>(result.obj.get()); + auto stream = result.asStream(); stream->setObjGen(result.getObjGen()); return result; } @@ -2706,18 +2186,7 @@ QPDFObjectHandle::newStream(QPDF* qpdf, std::string const& data) QPDFObjectHandle QPDFObjectHandle::newReserved(QPDF* qpdf) { - // Reserve a spot for this object by assigning it an object - // number, but then return an unresolved handle to the object. - QPDFObjectHandle reserved = qpdf->makeIndirectObject(makeReserved()); - QPDFObjectHandle result = newIndirect(qpdf, reserved.getObjGen()); - result.reserved = true; - return result; -} - -QPDFObjectHandle -QPDFObjectHandle::makeReserved() -{ - return QPDFObjectHandle(QPDF_Reserved::create()); + return qpdf->makeIndirectObject(QPDFObjectHandle(QPDF_Reserved::create())); } void @@ -2763,12 +2232,7 @@ QPDFObjectHandle::shallowCopyInternal( QTC::TC("qpdf", "QPDFObjectHandle ERR shallow copy stream"); throw std::runtime_error("attempt to make a shallow copy of a stream"); } - - if (isArray() || isDictionary()) { - new_obj = QPDFObjectHandle(obj->shallowCopy()); - } else { - new_obj = *this; - } + new_obj = QPDFObjectHandle(obj->shallowCopy()); std::set<QPDFObjGen> visited; new_obj.copyObject(visited, false, first_level_only, false); @@ -2809,9 +2273,6 @@ QPDFObjectHandle::copyObject( " reserved object handle direct"); } - qpdf = nullptr; - og = QPDFObjGen(); - std::shared_ptr<QPDFObject> new_obj; if (isBool() || isInteger() || isName() || isNull() || isReal() || @@ -2819,9 +2280,10 @@ QPDFObjectHandle::copyObject( new_obj = obj->shallowCopy(); } else if (isArray()) { std::vector<QPDFObjectHandle> items; - int n = getArrayNItems(); + auto array = asArray(); + int n = array->getNItems(); for (int i = 0; i < n; ++i) { - items.push_back(getArrayItem(i)); + items.push_back(array->getItem(i)); if ((!first_level_only) && (cross_indirect || (!items.back().isIndirect()))) { items.back().copyObject( @@ -2831,8 +2293,9 @@ QPDFObjectHandle::copyObject( new_obj = QPDF_Array::create(items); } else if (isDictionary()) { std::map<std::string, QPDFObjectHandle> items; + auto dict = asDictionary(); for (auto const& key: getKeys()) { - items[key] = getKey(key); + items[key] = dict->getKey(key); if ((!first_level_only) && (cross_indirect || (!items[key].isIndirect()))) { items[key].copyObject( @@ -2880,7 +2343,7 @@ QPDFObjectHandle::makeDirect(bool allow_streams) void QPDFObjectHandle::assertInitialized() const { - if (!this->initialized) { + if (!isInitialized()) { throw std::logic_error("operation attempted on uninitialized " "QPDFObjectHandle"); } @@ -3095,8 +2558,9 @@ QPDFObjectHandle::isImage(bool exclude_imagemask) void QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const { - if ((this->qpdf != nullptr) && (item.qpdf != nullptr) && - (this->qpdf != item.qpdf)) { + auto qpdf = getOwningQPDF(); + auto item_qpdf = item.getOwningQPDF(); + if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { QTC::TC("qpdf", "QPDFObjectHandle check ownership"); throw std::logic_error( "Attempting to add an object from a different QPDF." @@ -3115,28 +2579,10 @@ QPDFObjectHandle::assertPageObject() bool QPDFObjectHandle::dereference() { - if (!this->initialized) { + if (!isInitialized()) { return false; } - if (this->obj.get() && getObjectID() && - QPDF::Resolver::objectChanged(this->qpdf, getObjGen(), this->obj)) { - this->obj = nullptr; - } - if (this->obj.get() == nullptr) { - std::shared_ptr<QPDFObject> obj = - QPDF::Resolver::resolve(this->qpdf, getObjGen()); - if (obj.get() == nullptr) { - // QPDF::resolve never returns an uninitialized object, but - // check just in case. - this->obj = QPDF_Null::create(); - } else if (dynamic_cast<QPDF_Reserved*>(obj.get())) { - // Do not resolve - this->reserved = true; - } else { - this->reserved = false; - this->obj = obj; - } - } + this->obj->resolve(); return true; } diff --git a/libqpdf/QPDFOutlineDocumentHelper.cc b/libqpdf/QPDFOutlineDocumentHelper.cc index 4149ea1e..5b2f71f6 100644 --- a/libqpdf/QPDFOutlineDocumentHelper.cc +++ b/libqpdf/QPDFOutlineDocumentHelper.cc @@ -82,7 +82,7 @@ QPDFOutlineDocumentHelper::resolveNamedDest(QPDFObjectHandle name) result = this->m->dest_dict.getKey(name.getName()); } } else if (name.isString()) { - if (nullptr == this->m->names_dest.get()) { + if (nullptr == this->m->names_dest) { QPDFObjectHandle names = this->qpdf.getRoot().getKey("/Names"); if (names.isDictionary()) { QPDFObjectHandle dests = names.getKey("/Dests"); diff --git a/libqpdf/QPDFPageLabelDocumentHelper.cc b/libqpdf/QPDFPageLabelDocumentHelper.cc index 805dc40f..08a35097 100644 --- a/libqpdf/QPDFPageLabelDocumentHelper.cc +++ b/libqpdf/QPDFPageLabelDocumentHelper.cc @@ -16,7 +16,7 @@ QPDFPageLabelDocumentHelper::QPDFPageLabelDocumentHelper(QPDF& qpdf) : bool QPDFPageLabelDocumentHelper::hasPageLabels() { - return nullptr != this->m->labels.get(); + return nullptr != this->m->labels; } QPDFObjectHandle diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index 9ad75cf8..23a54231 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -432,7 +432,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) this->oh.replaceKey( "/Contents", QPDFObjectHandle::newStream( - this->oh.getOwningQPDF(), b.getBufferSharedPointer())); + this->oh.getOwningQPDF(false), + b.getBufferSharedPointer())); } } } else { @@ -683,11 +684,10 @@ QPDFPageObjectHelper::removeUnreferencedResources() QPDFPageObjectHelper QPDFPageObjectHelper::shallowCopyPage() { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::shallowCopyPage" - " called with a direct object"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::shallowCopyPage called with a direct object"); + QPDFObjectHandle new_page = this->oh.shallowCopy(); return QPDFPageObjectHelper(qpdf->makeIndirectObject(new_page)); } @@ -743,11 +743,10 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert) QPDFObjectHandle QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::getFormXObjectForPage" - " called with a direct object"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::getFormXObjectForPage called with a direct " + "object"); QPDFObjectHandle result = QPDFObjectHandle::newStream(qpdf); QPDFObjectHandle newdict = result.getDict(); newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); @@ -917,11 +916,9 @@ QPDFPageObjectHelper::placeFormXObject( void QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::flattenRotation" - " called with a direct object"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::flattenRotation called with a direct object"); auto rotate_oh = this->oh.getKey("/Rotate"); int rotate = 0; @@ -1066,16 +1063,12 @@ QPDFPageObjectHelper::copyAnnotations( return; } - QPDF* from_qpdf = from_page.getObjectHandle().getOwningQPDF(); - if (!from_qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::copyAnnotations:" - " from page is a direct object"); - } - QPDF* this_qpdf = this->oh.getOwningQPDF(); - if (!this_qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::copyAnnotations:" - " this page is a direct object"); - } + QPDF* from_qpdf = from_page.getObjectHandle().getOwningQPDF( + false, + "QPDFPageObjectHelper::copyAnnotations: from page is a direct object"); + QPDF* this_qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::copyAnnotations: this page is a direct object"); std::vector<QPDFObjectHandle> new_annots; std::vector<QPDFObjectHandle> new_fields; diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc new file mode 100644 index 00000000..9aa1f426 --- /dev/null +++ b/libqpdf/QPDFParser.cc @@ -0,0 +1,453 @@ +#include <qpdf/QPDFParser.hh> + +#include <qpdf/QPDF.hh> +#include <qpdf/QPDFObjGen.hh> +#include <qpdf/QPDFObjectHandle.hh> +#include <qpdf/QTC.hh> +#include <qpdf/QUtil.hh> + +namespace +{ + struct StackFrame + { + StackFrame(std::shared_ptr<InputSource> input) : + offset(input->tell()), + contents_string(""), + contents_offset(-1) + { + } + + std::vector<QPDFObjectHandle> olist; + qpdf_offset_t offset; + std::string contents_string; + qpdf_offset_t contents_offset; + }; +} // namespace + +QPDFObjectHandle +QPDFParser::parse(bool& empty, bool content_stream) +{ + // This method must take care not to resolve any objects. Don't + // check the type of any object without first ensuring that it is + // a direct object. Otherwise, doing so may have the side effect + // of reading the object and changing the file pointer. If you do + // this, it will cause a logic error to be thrown from + // QPDF::inParse(). + + QPDF::ParseGuard pg(context); + + empty = false; + + QPDFObjectHandle object; + bool set_offset = false; + + std::vector<StackFrame> stack; + stack.push_back(StackFrame(input)); + std::vector<parser_state_e> state_stack; + state_stack.push_back(st_top); + qpdf_offset_t offset; + bool done = false; + int bad_count = 0; + int good_count = 0; + bool b_contents = false; + bool is_null = false; + auto null_oh = QPDFObjectHandle::newNull(); + + while (!done) { + bool bad = false; + bool indirect_ref = false; + is_null = false; + auto& frame = stack.back(); + auto& olist = frame.olist; + parser_state_e state = state_stack.back(); + offset = frame.offset; + + object = QPDFObjectHandle(); + set_offset = false; + + QPDFTokenizer::Token token = + tokenizer.readToken(input, object_description, true); + std::string const& token_error_message = token.getErrorMessage(); + if (!token_error_message.empty()) { + // Tokens other than tt_bad can still generate warnings. + warn(token_error_message); + } + + switch (token.getType()) { + case QPDFTokenizer::tt_eof: + if (!content_stream) { + QTC::TC("qpdf", "QPDFParser eof in parse"); + warn("unexpected EOF"); + } + bad = true; + state = st_eof; + break; + + case QPDFTokenizer::tt_bad: + QTC::TC("qpdf", "QPDFParser bad token in parse"); + bad = true; + is_null = true; + break; + + case QPDFTokenizer::tt_brace_open: + case QPDFTokenizer::tt_brace_close: + QTC::TC("qpdf", "QPDFParser bad brace"); + warn("treating unexpected brace token as null"); + bad = true; + is_null = true; + break; + + case QPDFTokenizer::tt_array_close: + if (state == st_array) { + state = st_stop; + } else { + QTC::TC("qpdf", "QPDFParser bad array close"); + warn("treating unexpected array close token as null"); + bad = true; + is_null = true; + } + break; + + case QPDFTokenizer::tt_dict_close: + if (state == st_dictionary) { + state = st_stop; + } else { + QTC::TC("qpdf", "QPDFParser bad dictionary close"); + warn("unexpected dictionary close token"); + bad = true; + is_null = true; + } + break; + + case QPDFTokenizer::tt_array_open: + case QPDFTokenizer::tt_dict_open: + if (stack.size() > 500) { + QTC::TC("qpdf", "QPDFParser too deep"); + warn("ignoring excessively deeply nested data structure"); + bad = true; + is_null = true; + state = st_top; + } else { + state = st_start; + state_stack.push_back( + (token.getType() == QPDFTokenizer::tt_array_open) + ? st_array + : st_dictionary); + b_contents = false; + stack.push_back(StackFrame(input)); + } + break; + + case QPDFTokenizer::tt_bool: + object = QPDFObjectHandle::newBool((token.getValue() == "true")); + break; + + case QPDFTokenizer::tt_null: + is_null = true; + break; + + case QPDFTokenizer::tt_integer: + object = QPDFObjectHandle::newInteger( + QUtil::string_to_ll(token.getValue().c_str())); + break; + + case QPDFTokenizer::tt_real: + object = QPDFObjectHandle::newReal(token.getValue()); + break; + + case QPDFTokenizer::tt_name: + { + std::string name = token.getValue(); + object = QPDFObjectHandle::newName(name); + + if (name == "/Contents") { + b_contents = true; + } else { + b_contents = false; + } + } + break; + + case QPDFTokenizer::tt_word: + { + std::string const& value = token.getValue(); + auto size = olist.size(); + if (content_stream) { + object = QPDFObjectHandle::newOperator(value); + } else if ( + (value == "R") && (state != st_top) && (size >= 2) && + (!olist.back().isIndirect()) && + (olist.back().isInteger()) && + (!olist.at(size - 2).isIndirect()) && + (olist.at(size - 2).isInteger())) { + if (context == nullptr) { + QTC::TC("qpdf", "QPDFParser indirect without context"); + throw std::logic_error( + "QPDFObjectHandle::parse called without context" + " on an object with indirect references"); + } + auto ref_og = QPDFObjGen( + olist.at(size - 2).getIntValueAsInt(), + olist.back().getIntValueAsInt()); + if (ref_og.isIndirect()) { + object = context->getObject(ref_og); + indirect_ref = true; + } else { + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); + is_null = true; + } + olist.pop_back(); + olist.pop_back(); + } else if ((value == "endobj") && (state == st_top)) { + // We just saw endobj without having read + // anything. Treat this as a null and do not move + // the input source's offset. + is_null = true; + input->seek(input->getLastOffset(), SEEK_SET); + empty = true; + } else { + QTC::TC("qpdf", "QPDFParser treat word as string"); + warn("unknown token while reading object;" + " treating as string"); + bad = true; + object = QPDFObjectHandle::newString(value); + } + } + break; + + case QPDFTokenizer::tt_string: + { + std::string val = token.getValue(); + if (decrypter) { + if (b_contents) { + frame.contents_string = val; + frame.contents_offset = input->getLastOffset(); + b_contents = false; + } + decrypter->decryptString(val); + } + object = QPDFObjectHandle::newString(val); + } + + break; + + default: + warn("treating unknown token type as null while " + "reading object"); + bad = true; + is_null = true; + break; + } + + if (!object.isInitialized() && !is_null && + (!((state == st_start) || (state == st_stop) || + (state == st_eof)))) { + throw std::logic_error("QPDFObjectHandle::parseInternal: " + "unexpected uninitialized object"); + is_null = true; + } + + if (bad) { + ++bad_count; + good_count = 0; + } else { + ++good_count; + if (good_count > 3) { + bad_count = 0; + } + } + if (bad_count > 5) { + // We had too many consecutive errors without enough + // intervening successful objects. Give up. + warn("too many errors; giving up on reading object"); + state = st_top; + is_null = true; + } + + switch (state) { + case st_eof: + if (state_stack.size() > 1) { + warn("parse error while reading object"); + } + done = true; + // In content stream mode, leave object uninitialized to + // indicate EOF + if (!content_stream) { + is_null = true; + } + break; + + case st_dictionary: + case st_array: + if (!indirect_ref && !object.isDirectNull()) { + // No need to set description for direct nulls - they will + // become implicit. + setDescriptionFromInput(object, input->getLastOffset()); + object.setParsedOffset(input->getLastOffset()); + } + set_offset = true; + olist.push_back(is_null ? null_oh : object); + break; + + case st_top: + done = true; + break; + + case st_start: + break; + + case st_stop: + if ((state_stack.size() < 2) || (stack.size() < 2)) { + throw std::logic_error( + "QPDFObjectHandle::parseInternal: st_stop encountered" + " with insufficient elements in stack"); + } + parser_state_e old_state = state_stack.back(); + state_stack.pop_back(); + if (old_state == st_array) { + object = QPDFObjectHandle::newArray(olist); + setDescriptionFromInput(object, offset); + // The `offset` points to the next of "[". Set the rewind + // offset to point to the beginning of "[". This has been + // explicitly tested with whitespace surrounding the array start + // delimiter. getLastOffset points to the array end token and + // therefore can't be used here. + object.setParsedOffset(offset - 1); + set_offset = true; + } else if (old_state == st_dictionary) { + // Convert list to map. Alternating elements are keys. Attempt + // to recover more or less gracefully from invalid dictionaries. + std::set<std::string> names; + size_t n_elements = olist.size(); + for (size_t i = 0; i < n_elements; ++i) { + QPDFObjectHandle oh = olist.at(i); + if ((!oh.isIndirect()) && oh.isName()) { + names.insert(oh.getName()); + } + } + + std::map<std::string, QPDFObjectHandle> dict; + int next_fake_key = 1; + for (unsigned int i = 0; i < n_elements; ++i) { + QPDFObjectHandle key_obj = olist.at(i); + QPDFObjectHandle val; + if (key_obj.isIndirect() || (!key_obj.isName())) { + bool found_fake = false; + std::string candidate; + while (!found_fake) { + candidate = "/QPDFFake" + + QUtil::int_to_string(next_fake_key++); + found_fake = (names.count(candidate) == 0); + QTC::TC( + "qpdf", + "QPDFParser found fake", + (found_fake ? 0 : 1)); + } + warn( + offset, + "expected dictionary key but found" + " non-name object; inserting key " + + candidate); + val = key_obj; + key_obj = QPDFObjectHandle::newName(candidate); + } else if (i + 1 >= olist.size()) { + QTC::TC("qpdf", "QPDFParser no val for last key"); + warn( + offset, + "dictionary ended prematurely; " + "using null as value for last key"); + val = QPDFObjectHandle::newNull(); + setDescriptionFromInput(val, offset); + } else { + val = olist.at(++i); + } + std::string key = key_obj.getName(); + if (dict.count(key) > 0) { + QTC::TC("qpdf", "QPDFParser duplicate dict key"); + warn( + offset, + "dictionary has duplicated key " + key + + "; last occurrence overrides earlier " + "ones"); + } + dict[key] = val; + } + if (!frame.contents_string.empty() && dict.count("/Type") && + dict["/Type"].isNameAndEquals("/Sig") && + dict.count("/ByteRange") && dict.count("/Contents") && + dict["/Contents"].isString()) { + dict["/Contents"] = + QPDFObjectHandle::newString(frame.contents_string); + dict["/Contents"].setParsedOffset(frame.contents_offset); + } + object = QPDFObjectHandle::newDictionary(dict); + setDescriptionFromInput(object, offset); + // The `offset` points to the next of "<<". Set the rewind + // offset to point to the beginning of "<<". This has been + // explicitly tested with whitespace surrounding the dictionary + // start delimiter. getLastOffset points to the dictionary end + // token and therefore can't be used here. + object.setParsedOffset(offset - 2); + set_offset = true; + } + stack.pop_back(); + if (state_stack.back() == st_top) { + done = true; + } else { + stack.back().olist.push_back(is_null ? null_oh : object); + } + } + } + + if (is_null) { + object = QPDFObjectHandle::newNull(); + } + if (!set_offset) { + setDescriptionFromInput(object, offset); + object.setParsedOffset(offset); + } + return object; +} + +void +QPDFParser::setDescriptionFromInput( + QPDFObjectHandle oh, qpdf_offset_t offset) const +{ + oh.setObjectDescription( + context, + (input->getName() + ", " + object_description + " at offset " + + QUtil::int_to_string(offset))); +} + +void +QPDFParser::warn(QPDF* qpdf, QPDFExc const& e) +{ + // If parsing on behalf of a QPDF object and want to give a + // warning, we can warn through the object. If parsing for some + // other reason, such as an explicit creation of an object from a + // string, then just throw the exception. + if (qpdf) { + qpdf->warn(e); + } else { + throw e; + } +} + +void +QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const +{ + warn( + context, + QPDFExc( + qpdf_e_damaged_pdf, + input->getName(), + object_description, + offset, + msg)); +} + +void +QPDFParser::warn(std::string const& msg) const +{ + warn(input->getLastOffset(), msg); +} diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 1726e1b9..cd8f932d 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -73,28 +73,20 @@ QPDFWordTokenFinder::check() return true; } -QPDFTokenizer::Members::Members() : - allow_eof(false), - include_ignorable(false) -{ - reset(); -} - void -QPDFTokenizer::Members::reset() +QPDFTokenizer::reset() { - state = st_top; + state = st_before_token; type = tt_bad; - val = ""; - raw_val = ""; + val.clear(); + raw_val.clear(); error_message = ""; - unread_char = false; + before_token = true; + in_token = false; char_to_unread = '\0'; inline_image_bytes = 0; string_depth = 0; - string_ignoring_newline = false; - last_char_was_bs = false; - last_char_was_cr = false; + bad = false; } QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : @@ -110,20 +102,22 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : } QPDFTokenizer::QPDFTokenizer() : - m(new Members()) + allow_eof(false), + include_ignorable(false) { + reset(); } void QPDFTokenizer::allowEOF() { - this->m->allow_eof = true; + this->allow_eof = true; } void QPDFTokenizer::includeIgnorable() { - this->m->include_ignorable = true; + this->include_ignorable = true; } bool @@ -139,376 +133,719 @@ QPDFTokenizer::isDelimiter(char ch) } void -QPDFTokenizer::resolveLiteral() -{ - if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) { - this->m->type = tt_name; - // Deal with # in name token. Note: '/' by itself is a - // valid name, so don't strip leading /. That way we - // don't have to deal with the empty string as a name. - std::string nval = "/"; - size_t len = this->m->val.length(); - for (size_t i = 1; i < len; ++i) { - char ch = this->m->val.at(i); - if (ch == '#') { - if ((i + 2 < len) && - QUtil::is_hex_digit(this->m->val.at(i + 1)) && - QUtil::is_hex_digit(this->m->val.at(i + 2))) { - char num[3]; - num[0] = this->m->val.at(i + 1); - num[1] = this->m->val.at(i + 2); - num[2] = '\0'; - char ch2 = static_cast<char>(strtol(num, nullptr, 16)); - if (ch2 == '\0') { - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer null in name"); - this->m->error_message = - "null character not allowed in name token"; - nval += "#00"; - } else { - nval.append(1, ch2); - } - i += 2; - } else { - QTC::TC("qpdf", "QPDFTokenizer bad name"); - this->m->error_message = - "name with stray # will not work with PDF >= 1.2"; - // Use null to encode a bad # -- this is reversed - // in QPDF_Name::normalizeName. - nval += '\0'; - } - } else { - nval.append(1, ch); - } - } - this->m->val = nval; - } else if (QUtil::is_number(this->m->val.c_str())) { - if (this->m->val.find('.') != std::string::npos) { - this->m->type = tt_real; - } else { - this->m->type = tt_integer; - } - } else if ((this->m->val == "true") || (this->m->val == "false")) { - this->m->type = tt_bool; - } else if (this->m->val == "null") { - this->m->type = tt_null; - } else { - // I don't really know what it is, so leave it as tt_word. - // Lots of cases ($, #, etc.) other than actual words fall - // into this category, but that's okay at least for now. - this->m->type = tt_word; +QPDFTokenizer::presentCharacter(char ch) +{ + handleCharacter(ch); + + if (this->in_token) { + this->raw_val += ch; } } void -QPDFTokenizer::presentCharacter(char ch) +QPDFTokenizer::handleCharacter(char ch) { - if (this->m->state == st_token_ready) { + // State machine is implemented such that the final character may not be + // handled. This happens whenever you have to use a character from the + // next token to detect the end of the current token. + + switch (this->state) { + case st_top: + inTop(ch); + return; + + case st_in_space: + inSpace(ch); + return; + + case st_in_comment: + inComment(ch); + return; + + case st_lt: + inLt(ch); + return; + + case st_gt: + inGt(ch); + return; + + case st_in_string: + inString(ch); + return; + + case st_name: + inName(ch); + return; + + case st_number: + inNumber(ch); + return; + + case st_real: + inReal(ch); + return; + + case st_string_after_cr: + inStringAfterCR(ch); + return; + + case st_string_escape: + inStringEscape(ch); + return; + + case st_char_code: + inCharCode(ch); + return; + + case st_literal: + inLiteral(ch); + return; + + case st_inline_image: + inInlineImage(ch); + return; + + case st_in_hexstring: + inHexstring(ch); + return; + + case st_in_hexstring_2nd: + inHexstring2nd(ch); + return; + + case st_name_hex1: + inNameHex1(ch); + return; + + case st_name_hex2: + inNameHex2(ch); + return; + + case st_sign: + inSign(ch); + return; + + case st_decimal: + inDecimal(ch); + return; + + case (st_before_token): + inBeforeToken(ch); + return; + + case (st_token_ready): + inTokenReady(ch); + return; + + default: throw std::logic_error( - "INTERNAL ERROR: QPDF tokenizer presented character " - "while token is waiting"); + "INTERNAL ERROR: invalid state while reading token"); } +} - char orig_ch = ch; - - // State machine is implemented such that some characters may be - // handled more than once. This happens whenever you have to use - // the character that caused a state change in the new state. +void +QPDFTokenizer::inTokenReady(char ch) +{ + throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character " + "while token is waiting"); +} - bool handled = true; - if (this->m->state == st_top) { - // Note: we specifically do not use ctype here. It is - // locale-dependent. - if (isSpace(ch)) { - if (this->m->include_ignorable) { - this->m->state = st_in_space; - this->m->val += ch; - } - } else if (ch == '%') { - this->m->state = st_in_comment; - if (this->m->include_ignorable) { - this->m->val += ch; - } - } else if (ch == '(') { - this->m->string_depth = 1; - this->m->string_ignoring_newline = false; - memset( - this->m->bs_num_register, - '\0', - sizeof(this->m->bs_num_register)); - this->m->last_char_was_bs = false; - this->m->last_char_was_cr = false; - this->m->state = st_in_string; - } else if (ch == '<') { - this->m->state = st_lt; - } else if (ch == '>') { - this->m->state = st_gt; - } else { - this->m->val += ch; - if (ch == ')') { - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer bad )"); - this->m->error_message = "unexpected )"; - this->m->state = st_token_ready; - } else if (ch == '[') { - this->m->type = tt_array_open; - this->m->state = st_token_ready; - } else if (ch == ']') { - this->m->type = tt_array_close; - this->m->state = st_token_ready; - } else if (ch == '{') { - this->m->type = tt_brace_open; - this->m->state = st_token_ready; - } else if (ch == '}') { - this->m->type = tt_brace_close; - this->m->state = st_token_ready; - } else { - this->m->state = st_literal; - } - } - } else if (this->m->state == st_in_space) { - // We only enter this state if include_ignorable is true. - if (!isSpace(ch)) { - this->m->type = tt_space; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; - } else { - this->m->val += ch; - } - } else if (this->m->state == st_in_comment) { - if ((ch == '\r') || (ch == '\n')) { - if (this->m->include_ignorable) { - this->m->type = tt_comment; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; - } else { - this->m->state = st_top; - } - } else if (this->m->include_ignorable) { - this->m->val += ch; - } - } else if (this->m->state == st_lt) { - if (ch == '<') { - this->m->val = "<<"; - this->m->type = tt_dict_open; - this->m->state = st_token_ready; - } else { - handled = false; - this->m->state = st_in_hexstring; - } - } else if (this->m->state == st_gt) { - if (ch == '>') { - this->m->val = ">>"; - this->m->type = tt_dict_close; - this->m->state = st_token_ready; - } else { - this->m->val = ">"; - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer bad >"); - this->m->error_message = "unexpected >"; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; +void +QPDFTokenizer::inBeforeToken(char ch) +{ + // Note: we specifically do not use ctype here. It is + // locale-dependent. + if (isSpace(ch)) { + this->before_token = !this->include_ignorable; + this->in_token = this->include_ignorable; + if (this->include_ignorable) { + this->state = st_in_space; + this->val += ch; } - } else if (this->m->state == st_in_string) { - if (this->m->string_ignoring_newline && (ch != '\n')) { - this->m->string_ignoring_newline = false; + } else if (ch == '%') { + this->before_token = !this->include_ignorable; + this->in_token = this->include_ignorable; + this->state = st_in_comment; + if (this->include_ignorable) { + this->val += ch; } + } else { + this->before_token = false; + this->in_token = true; + inTop(ch); + } +} - size_t bs_num_count = strlen(this->m->bs_num_register); - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); - if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { - // We've accumulated \ddd. PDF Spec says to ignore - // high-order overflow. - this->m->val += - static_cast<char>(strtol(this->m->bs_num_register, nullptr, 8)); - memset( - this->m->bs_num_register, - '\0', - sizeof(this->m->bs_num_register)); - bs_num_count = 0; - } +void +QPDFTokenizer::inTop(char ch) +{ + switch (ch) { + case '(': + this->string_depth = 1; + this->state = st_in_string; + return; - if (this->m->string_ignoring_newline && (ch == '\n')) { - // ignore - this->m->string_ignoring_newline = false; - } else if ( - ch_is_octal && (this->m->last_char_was_bs || (bs_num_count > 0))) { - this->m->bs_num_register[bs_num_count++] = ch; - } else if (this->m->last_char_was_bs) { - switch (ch) { - case 'n': - this->m->val += '\n'; - break; + case '<': + this->state = st_lt; + return; - case 'r': - this->m->val += '\r'; - break; + case '>': + this->state = st_gt; + return; - case 't': - this->m->val += '\t'; - break; + case (')'): + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad )"); + this->error_message = "unexpected )"; + this->val += ch; + this->state = st_token_ready; + return; - case 'b': - this->m->val += '\b'; - break; + case '[': + this->type = tt_array_open; + this->state = st_token_ready; + this->val += ch; + return; - case 'f': - this->m->val += '\f'; - break; + case ']': + this->type = tt_array_close; + this->val += ch; + this->state = st_token_ready; + return; - case '\n': - break; + case '{': + this->type = tt_brace_open; + this->state = st_token_ready; + this->val += ch; + return; - case '\r': - this->m->string_ignoring_newline = true; - break; + case '}': + this->type = tt_brace_close; + this->state = st_token_ready; + this->val += ch; + return; - default: - // PDF spec says backslash is ignored before anything else - this->m->val += ch; - break; - } - } else if (ch == '\\') { - // last_char_was_bs is set/cleared below as appropriate - if (bs_num_count) { - throw std::logic_error( - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " - "when ch == '\\'"); - } - } else if (ch == '(') { - this->m->val += ch; - ++this->m->string_depth; - } else if ((ch == ')') && (--this->m->string_depth == 0)) { - this->m->type = tt_string; - this->m->state = st_token_ready; - } else if (ch == '\r') { - // CR by itself is converted to LF - this->m->val += '\n'; - } else if (ch == '\n') { - // CR LF is converted to LF - if (!this->m->last_char_was_cr) { - this->m->val += ch; - } - } else { - this->m->val += ch; - } + case '/': + this->state = st_name; + this->val += ch; + return; - this->m->last_char_was_cr = - ((!this->m->string_ignoring_newline) && (ch == '\r')); - this->m->last_char_was_bs = - ((!this->m->last_char_was_bs) && (ch == '\\')); - } else if (this->m->state == st_literal) { - if (isDelimiter(ch)) { - // A C-locale whitespace character or delimiter terminates - // token. It is important to unread the whitespace - // character even though it is ignored since it may be the - // newline after a stream keyword. Removing it here could - // make the stream-reading code break on some files, - // though not on any files in the test suite as of this - // writing. - - this->m->type = tt_word; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this->state = st_number; + this->val += ch; + return; + + case '+': + case '-': + this->state = st_sign; + this->val += ch; + return; + + case '.': + this->state = st_decimal; + this->val += ch; + return; + + default: + this->state = st_literal; + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inSpace(char ch) +{ + // We only enter this state if include_ignorable is true. + if (!isSpace(ch)) { + this->type = tt_space; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + return; + } else { + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inComment(char ch) +{ + if ((ch == '\r') || (ch == '\n')) { + if (this->include_ignorable) { + this->type = tt_comment; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; } else { - this->m->val += ch; + this->state = st_before_token; } - } else if (this->m->state == st_inline_image) { - this->m->val += ch; - size_t len = this->m->val.length(); - if (len == this->m->inline_image_bytes) { - QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); - this->m->type = tt_inline_image; - this->m->inline_image_bytes = 0; - this->m->state = st_token_ready; + } else if (this->include_ignorable) { + this->val += ch; + } +} + +void +QPDFTokenizer::inString(char ch) +{ + switch (ch) { + case '\\': + this->state = st_string_escape; + return; + + case '(': + this->val += ch; + ++this->string_depth; + return; + + case ')': + if (--this->string_depth == 0) { + this->type = tt_string; + this->state = st_token_ready; + return; } + + this->val += ch; + return; + + case '\r': + // CR by itself is converted to LF + this->val += '\n'; + this->state = st_string_after_cr; + return; + + case '\n': + this->val += ch; + return; + + default: + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inName(char ch) +{ + if (isDelimiter(ch)) { + // A C-locale whitespace character or delimiter terminates + // token. It is important to unread the whitespace + // character even though it is ignored since it may be the + // newline after a stream keyword. Removing it here could + // make the stream-reading code break on some files, + // though not on any files in the test suite as of this + // writing. + + this->type = this->bad ? tt_bad : tt_name; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + } else if (ch == '#') { + this->char_code = 0; + this->state = st_name_hex1; } else { - handled = false; - } - - if (handled) { - // okay - } else if (this->m->state == st_in_hexstring) { - if (ch == '>') { - this->m->type = tt_string; - this->m->state = st_token_ready; - if (this->m->val.length() % 2) { - // PDF spec says odd hexstrings have implicit - // trailing 0. - this->m->val += '0'; - } - char num[3]; - num[2] = '\0'; - std::string nval; - for (unsigned int i = 0; i < this->m->val.length(); i += 2) { - num[0] = this->m->val.at(i); - num[1] = this->m->val.at(i + 1); - char nch = static_cast<char>(strtol(num, nullptr, 16)); - nval += nch; - } - this->m->val = nval; - } else if (QUtil::is_hex_digit(ch)) { - this->m->val += ch; - } else if (isSpace(ch)) { - // ignore - } else { - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); - this->m->error_message = - std::string("invalid character (") + ch + ") in hexstring"; - this->m->state = st_token_ready; - } + this->val += ch; + } +} + +void +QPDFTokenizer::inNameHex1(char ch) +{ + this->hex_char = ch; + + if ('0' <= ch && ch <= '9') { + this->char_code = 16 * (int(ch) - int('0')); + this->state = st_name_hex2; + + } else if ('A' <= ch && ch <= 'F') { + this->char_code = 16 * (10 + int(ch) - int('A')); + this->state = st_name_hex2; + + } else if ('a' <= ch && ch <= 'f') { + this->char_code = 16 * (10 + int(ch) - int('a')); + this->state = st_name_hex2; + } else { - throw std::logic_error( - "INTERNAL ERROR: invalid state while reading token"); + QTC::TC("qpdf", "QPDFTokenizer bad name 1"); + this->error_message = "name with stray # will not work with PDF >= 1.2"; + // Use null to encode a bad # -- this is reversed + // in QPDF_Name::normalizeName. + this->val += '\0'; + this->state = st_name; + inName(ch); + } +} + +void +QPDFTokenizer::inNameHex2(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->char_code += int(ch) - int('0'); + + } else if ('A' <= ch && ch <= 'F') { + this->char_code += 10 + int(ch) - int('A'); + + } else if ('a' <= ch && ch <= 'f') { + this->char_code += 10 + int(ch) - int('a'); + + } else { + QTC::TC("qpdf", "QPDFTokenizer bad name 2"); + this->error_message = "name with stray # will not work with PDF >= 1.2"; + // Use null to encode a bad # -- this is reversed + // in QPDF_Name::normalizeName. + this->val += '\0'; + this->val += this->hex_char; + this->state = st_name; + inName(ch); + return; + } + if (this->char_code == 0) { + QTC::TC("qpdf", "QPDFTokenizer null in name"); + this->error_message = "null character not allowed in name token"; + this->val += "#00"; + this->state = st_name; + this->bad = true; + } else { + this->val += char(this->char_code); + this->state = st_name; + } +} + +void +QPDFTokenizer::inSign(char ch) +{ + if (QUtil::is_digit(ch)) { + this->state = st_number; + this->val += ch; + } else if (ch == '.') { + this->state = st_decimal; + this->val += ch; + } else { + this->state = st_literal; + inLiteral(ch); } +} - if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) { - resolveLiteral(); +void +QPDFTokenizer::inDecimal(char ch) +{ + if (QUtil::is_digit(ch)) { + this->state = st_real; + this->val += ch; + } else { + this->state = st_literal; + inLiteral(ch); } +} + +void +QPDFTokenizer::inNumber(char ch) +{ + if (QUtil::is_digit(ch)) { + this->val += ch; + } else if (ch == '.') { + this->state = st_real; + this->val += ch; + } else if (isDelimiter(ch)) { + this->type = tt_integer; + this->state = st_token_ready; + this->in_token = false; + this->char_to_unread = ch; + } else { + this->state = st_literal; + this->val += ch; + } +} + +void +QPDFTokenizer::inReal(char ch) +{ + if (QUtil::is_digit(ch)) { + this->val += ch; + } else if (isDelimiter(ch)) { + this->type = tt_real; + this->state = st_token_ready; + this->in_token = false; + this->char_to_unread = ch; + } else { + this->state = st_literal; + this->val += ch; + } +} +void +QPDFTokenizer::inStringEscape(char ch) +{ + this->state = st_in_string; + switch (ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + this->state = st_char_code; + this->char_code = 0; + this->digit_count = 0; + inCharCode(ch); + return; + + case 'n': + this->val += '\n'; + return; + + case 'r': + this->val += '\r'; + return; + + case 't': + this->val += '\t'; + return; + + case 'b': + this->val += '\b'; + return; - if (!(betweenTokens() || - ((this->m->state == st_token_ready) && this->m->unread_char))) { - this->m->raw_val += orig_ch; + case 'f': + this->val += '\f'; + return; + + case '\n': + return; + + case '\r': + this->state = st_string_after_cr; + return; + + default: + // PDF spec says backslash is ignored before anything else + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inStringAfterCR(char ch) +{ + this->state = st_in_string; + if (ch != '\n') { + inString(ch); + } +} + +void +QPDFTokenizer::inLt(char ch) +{ + if (ch == '<') { + this->val += "<<"; + this->type = tt_dict_open; + this->state = st_token_ready; + return; + } + + this->state = st_in_hexstring; + inHexstring(ch); +} + +void +QPDFTokenizer::inGt(char ch) +{ + if (ch == '>') { + this->val += ">>"; + this->type = tt_dict_close; + this->state = st_token_ready; + } else { + this->val += ">"; + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad >"); + this->error_message = "unexpected >"; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + } +} + +void +QPDFTokenizer::inLiteral(char ch) +{ + if (isDelimiter(ch)) { + // A C-locale whitespace character or delimiter terminates + // token. It is important to unread the whitespace + // character even though it is ignored since it may be the + // newline after a stream keyword. Removing it here could + // make the stream-reading code break on some files, + // though not on any files in the test suite as of this + // writing. + + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + this->type = (this->val == "true") || (this->val == "false") + ? tt_bool + : (this->val == "null" ? tt_null : tt_word); + } else { + this->val += ch; + } +} + +void +QPDFTokenizer::inHexstring(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->char_code = 16 * (int(ch) - int('0')); + this->state = st_in_hexstring_2nd; + + } else if ('A' <= ch && ch <= 'F') { + this->char_code = 16 * (10 + int(ch) - int('A')); + this->state = st_in_hexstring_2nd; + + } else if ('a' <= ch && ch <= 'f') { + this->char_code = 16 * (10 + int(ch) - int('a')); + this->state = st_in_hexstring_2nd; + + } else if (ch == '>') { + this->type = tt_string; + this->state = st_token_ready; + + } else if (isSpace(ch)) { + // ignore + + } else { + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); + this->error_message = + std::string("invalid character (") + ch + ") in hexstring"; + this->state = st_token_ready; + } +} + +void +QPDFTokenizer::inHexstring2nd(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->val += char(this->char_code + int(ch) - int('0')); + this->state = st_in_hexstring; + + } else if ('A' <= ch && ch <= 'F') { + this->val += char(this->char_code + 10 + int(ch) - int('A')); + this->state = st_in_hexstring; + + } else if ('a' <= ch && ch <= 'f') { + this->val += char(this->char_code + 10 + int(ch) - int('a')); + this->state = st_in_hexstring; + + } else if (ch == '>') { + // PDF spec says odd hexstrings have implicit trailing 0. + this->val += char(this->char_code); + this->type = tt_string; + this->state = st_token_ready; + + } else if (isSpace(ch)) { + // ignore + + } else { + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); + this->error_message = + std::string("invalid character (") + ch + ") in hexstring"; + this->state = st_token_ready; + } +} + +void +QPDFTokenizer::inCharCode(char ch) +{ + if (('0' <= ch) && (ch <= '7')) { + this->char_code = 8 * this->char_code + (int(ch) - int('0')); + if (++(this->digit_count) < 3) { + return; + } + // We've accumulated \ddd. PDF Spec says to ignore + // high-order overflow. + } + this->val += char(this->char_code % 256); + this->state = st_in_string; + return; +} + +void +QPDFTokenizer::inInlineImage(char ch) +{ + this->val += ch; + if (this->val.length() == this->inline_image_bytes) { + QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); + this->type = tt_inline_image; + this->inline_image_bytes = 0; + this->state = st_token_ready; } } void QPDFTokenizer::presentEOF() { - if (this->m->state == st_literal) { + switch (this->state) { + case st_name: + case st_name_hex1: + case st_name_hex2: + case st_number: + case st_real: + case st_sign: + case st_decimal: + case st_literal: QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); - resolveLiteral(); - } else if ( - (this->m->include_ignorable) && (this->m->state == st_in_space)) { - this->m->type = tt_space; - } else if ( - (this->m->include_ignorable) && (this->m->state == st_in_comment)) { - this->m->type = tt_comment; - } else if (betweenTokens()) { - this->m->type = tt_eof; - } else if (this->m->state != st_token_ready) { + // Push any delimiter to the state machine to finish off the final + // token. + presentCharacter('\f'); + this->in_token = true; + break; + + case st_top: + case st_before_token: + this->type = tt_eof; + break; + + case st_in_space: + this->type = this->include_ignorable ? tt_space : tt_eof; + break; + + case st_in_comment: + this->type = this->include_ignorable ? tt_comment : tt_bad; + break; + + case st_token_ready: + break; + + default: QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); - this->m->type = tt_bad; - this->m->error_message = "EOF while reading token"; + this->type = tt_bad; + this->error_message = "EOF while reading token"; } - - this->m->state = st_token_ready; + this->state = st_token_ready; } void QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) { - if (this->m->state != st_top) { + if (this->state != st_before_token) { throw std::logic_error("QPDFTokenizer::expectInlineImage called" " when tokenizer is in improper state"); } findEI(input); - this->m->state = st_inline_image; + this->before_token = false; + this->in_token = true; + this->state = st_inline_image; } void @@ -537,7 +874,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) if (!input->findFirst("EI", input->tell(), 0, f)) { break; } - this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); + this->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); QPDFTokenizer check; bool found_bad = false; @@ -610,19 +947,16 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) bool QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) { - bool ready = (this->m->state == st_token_ready); - unread_char = this->m->unread_char; - ch = this->m->char_to_unread; + bool ready = (this->state == st_token_ready); + unread_char = !this->in_token && !this->before_token; + ch = this->char_to_unread; if (ready) { - if (this->m->type == tt_bad) { - this->m->val = this->m->raw_val; - } - token = Token( - this->m->type, - this->m->val, - this->m->raw_val, - this->m->error_message); - this->m->reset(); + token = (this->type == tt_bad) + ? Token( + this->type, this->raw_val, this->raw_val, this->error_message) + : Token(this->type, this->val, this->raw_val, this->error_message); + + this->reset(); } return ready; } @@ -630,11 +964,7 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) bool QPDFTokenizer::betweenTokens() { - return ( - (this->m->state == st_top) || - ((!this->m->include_ignorable) && - ((this->m->state == st_in_comment) || - (this->m->state == st_in_space)))); + return this->before_token; } QPDFTokenizer::Token @@ -644,49 +974,46 @@ QPDFTokenizer::readToken( bool allow_bad, size_t max_len) { - qpdf_offset_t offset = input->tell(); - Token token; - bool unread_char; - char char_to_unread; - bool presented_eof = false; - while (!getToken(token, unread_char, char_to_unread)) { + qpdf_offset_t offset = input->fastTell(); + + while (this->state != st_token_ready) { char ch; - if (input->read(&ch, 1) == 0) { - if (!presented_eof) { - presentEOF(); - presented_eof = true; - if ((this->m->type == tt_eof) && (!this->m->allow_eof)) { - // Nothing in the qpdf library calls readToken - // without allowEOF anymore, so this case is not - // exercised. - this->m->type = tt_bad; - this->m->error_message = "unexpected EOF"; - offset = input->getLastOffset(); - } - } else { - throw std::logic_error( - "getToken returned false after presenting EOF"); + if (!input->fastRead(ch)) { + presentEOF(); + + if ((this->type == tt_eof) && (!this->allow_eof)) { + // Nothing in the qpdf library calls readToken + // without allowEOF anymore, so this case is not + // exercised. + this->type = tt_bad; + this->error_message = "unexpected EOF"; + offset = input->getLastOffset(); } } else { - presentCharacter(ch); - if (betweenTokens() && (input->getLastOffset() == offset)) { + handleCharacter(ch); + if (this->before_token) { ++offset; } - if (max_len && (this->m->raw_val.length() >= max_len) && - (this->m->state != st_token_ready)) { + if (this->in_token) { + this->raw_val += ch; + } + if (max_len && (this->raw_val.length() >= max_len) && + (this->state != st_token_ready)) { // terminate this token now QTC::TC("qpdf", "QPDFTokenizer block long token"); - this->m->type = tt_bad; - this->m->state = st_token_ready; - this->m->error_message = + this->type = tt_bad; + this->state = st_token_ready; + this->error_message = "exceeded allowable length while reading token"; } } } - if (unread_char) { - input->unreadCh(char_to_unread); - } + Token token; + bool unread_char; + char char_to_unread; + getToken(token, unread_char, char_to_unread); + input->fastUnread(unread_char); if (token.getType() != tt_eof) { input->setLastOffset(offset); diff --git a/libqpdf/QPDFValue.cc b/libqpdf/QPDFValue.cc new file mode 100644 index 00000000..8a6222d2 --- /dev/null +++ b/libqpdf/QPDFValue.cc @@ -0,0 +1,11 @@ +#include <qpdf/QPDFValue.hh> + +#include <qpdf/QPDFObject.hh> + +std::shared_ptr<QPDFObject> +QPDFValue::do_create(QPDFValue* object) +{ + std::shared_ptr<QPDFObject> obj(new QPDFObject()); + obj->value = std::shared_ptr<QPDFValue>(object); + return obj; +} diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index e33d0965..028f73dc 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1897,7 +1897,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) // pass 1. indicateProgress(true, false); } - QPDFObjectHandle obj_to_write = this->m->pdf.getObjectByObjGen(obj); + QPDFObjectHandle obj_to_write = this->m->pdf.getObject(obj); if (obj_to_write.isStream()) { // This condition occurred in a fuzz input. Ideally we // should block it at at parse time, but it's not diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc index 55e4d20a..63fe98d4 100644 --- a/libqpdf/QPDF_Array.cc +++ b/libqpdf/QPDF_Array.cc @@ -4,12 +4,14 @@ #include <qpdf/QUtil.hh> #include <stdexcept> -QPDF_Array::QPDF_Array(std::vector<QPDFObjectHandle> const& v) +QPDF_Array::QPDF_Array(std::vector<QPDFObjectHandle> const& v) : + QPDFValue(::ot_array, "array") { setFromVector(v); } QPDF_Array::QPDF_Array(SparseOHArray const& items) : + QPDFValue(::ot_array, "array"), elements(items) { } @@ -62,18 +64,6 @@ QPDF_Array::getJSON(int json_version) return j; } -QPDFObject::object_type_e -QPDF_Array::getTypeCode() const -{ - return QPDFObject::ot_array; -} - -char const* -QPDF_Array::getTypeName() const -{ - return "array"; -} - int QPDF_Array::getNItems() const { diff --git a/libqpdf/QPDF_Bool.cc b/libqpdf/QPDF_Bool.cc index f26325c3..efbfd6c9 100644 --- a/libqpdf/QPDF_Bool.cc +++ b/libqpdf/QPDF_Bool.cc @@ -1,6 +1,7 @@ #include <qpdf/QPDF_Bool.hh> QPDF_Bool::QPDF_Bool(bool val) : + QPDFValue(::ot_boolean, "boolean"), val(val) { } @@ -29,18 +30,6 @@ QPDF_Bool::getJSON(int json_version) return JSON::makeBool(this->val); } -QPDFObject::object_type_e -QPDF_Bool::getTypeCode() const -{ - return QPDFObject::ot_boolean; -} - -char const* -QPDF_Bool::getTypeName() const -{ - return "boolean"; -} - bool QPDF_Bool::getVal() const { diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc index 60b2339f..845bcad8 100644 --- a/libqpdf/QPDF_Dictionary.cc +++ b/libqpdf/QPDF_Dictionary.cc @@ -1,10 +1,10 @@ #include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Name.hh> -#include <qpdf/QPDF_Null.hh> QPDF_Dictionary::QPDF_Dictionary( std::map<std::string, QPDFObjectHandle> const& items) : + QPDFValue(::ot_dictionary, "dictionary"), items(items) { } @@ -58,18 +58,6 @@ QPDF_Dictionary::getJSON(int json_version) return j; } -QPDFObject::object_type_e -QPDF_Dictionary::getTypeCode() const -{ - return QPDFObject::ot_dictionary; -} - -char const* -QPDF_Dictionary::getTypeName() const -{ - return "dictionary"; -} - bool QPDF_Dictionary::hasKey(std::string const& key) { diff --git a/libqpdf/QPDF_InlineImage.cc b/libqpdf/QPDF_InlineImage.cc index c3c656e0..76318196 100644 --- a/libqpdf/QPDF_InlineImage.cc +++ b/libqpdf/QPDF_InlineImage.cc @@ -1,6 +1,7 @@ #include <qpdf/QPDF_InlineImage.hh> QPDF_InlineImage::QPDF_InlineImage(std::string const& val) : + QPDFValue(::ot_inlineimage, "inline-image"), val(val) { } @@ -29,18 +30,6 @@ QPDF_InlineImage::getJSON(int json_version) return JSON::makeNull(); } -QPDFObject::object_type_e -QPDF_InlineImage::getTypeCode() const -{ - return QPDFObject::ot_inlineimage; -} - -char const* -QPDF_InlineImage::getTypeName() const -{ - return "inline-image"; -} - std::string QPDF_InlineImage::getVal() const { diff --git a/libqpdf/QPDF_Integer.cc b/libqpdf/QPDF_Integer.cc index e8d23e4a..24812573 100644 --- a/libqpdf/QPDF_Integer.cc +++ b/libqpdf/QPDF_Integer.cc @@ -3,6 +3,7 @@ #include <qpdf/QUtil.hh> QPDF_Integer::QPDF_Integer(long long val) : + QPDFValue(::ot_integer, "integer"), val(val) { } @@ -31,18 +32,6 @@ QPDF_Integer::getJSON(int json_version) return JSON::makeInt(this->val); } -QPDFObject::object_type_e -QPDF_Integer::getTypeCode() const -{ - return QPDFObject::ot_integer; -} - -char const* -QPDF_Integer::getTypeName() const -{ - return "integer"; -} - long long QPDF_Integer::getVal() const { diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc index 73990775..c86d34b4 100644 --- a/libqpdf/QPDF_Name.cc +++ b/libqpdf/QPDF_Name.cc @@ -5,6 +5,7 @@ #include <string.h> QPDF_Name::QPDF_Name(std::string const& name) : + QPDFValue(::ot_name, "name"), name(name) { } @@ -61,18 +62,6 @@ QPDF_Name::getJSON(int json_version) } } -QPDFObject::object_type_e -QPDF_Name::getTypeCode() const -{ - return QPDFObject::ot_name; -} - -char const* -QPDF_Name::getTypeName() const -{ - return "name"; -} - std::string QPDF_Name::getName() const { diff --git a/libqpdf/QPDF_Null.cc b/libqpdf/QPDF_Null.cc index b015ed8b..f60dda1f 100644 --- a/libqpdf/QPDF_Null.cc +++ b/libqpdf/QPDF_Null.cc @@ -1,5 +1,10 @@ #include <qpdf/QPDF_Null.hh> +QPDF_Null::QPDF_Null() : + QPDFValue(::ot_null, "null") +{ +} + std::shared_ptr<QPDFObject> QPDF_Null::create() { @@ -23,15 +28,3 @@ QPDF_Null::getJSON(int json_version) { return JSON::makeNull(); } - -QPDFObject::object_type_e -QPDF_Null::getTypeCode() const -{ - return QPDFObject::ot_null; -} - -char const* -QPDF_Null::getTypeName() const -{ - return "null"; -} diff --git a/libqpdf/QPDF_Operator.cc b/libqpdf/QPDF_Operator.cc index cd5009ae..547ff40a 100644 --- a/libqpdf/QPDF_Operator.cc +++ b/libqpdf/QPDF_Operator.cc @@ -1,6 +1,7 @@ #include <qpdf/QPDF_Operator.hh> QPDF_Operator::QPDF_Operator(std::string const& val) : + QPDFValue(::ot_operator, "operator"), val(val) { } @@ -20,7 +21,7 @@ QPDF_Operator::shallowCopy() std::string QPDF_Operator::unparse() { - return this->val; + return val; } JSON @@ -29,18 +30,6 @@ QPDF_Operator::getJSON(int json_version) return JSON::makeNull(); } -QPDFObject::object_type_e -QPDF_Operator::getTypeCode() const -{ - return QPDFObject::ot_operator; -} - -char const* -QPDF_Operator::getTypeName() const -{ - return "operator"; -} - std::string QPDF_Operator::getVal() const { diff --git a/libqpdf/QPDF_Real.cc b/libqpdf/QPDF_Real.cc index 138bbb3c..85c9ceeb 100644 --- a/libqpdf/QPDF_Real.cc +++ b/libqpdf/QPDF_Real.cc @@ -3,12 +3,14 @@ #include <qpdf/QUtil.hh> QPDF_Real::QPDF_Real(std::string const& val) : + QPDFValue(::ot_real, "real"), val(val) { } QPDF_Real::QPDF_Real( double value, int decimal_places, bool trim_trailing_zeroes) : + QPDFValue(::ot_real, "real"), val(QUtil::double_to_string(value, decimal_places, trim_trailing_zeroes)) { } @@ -60,18 +62,6 @@ QPDF_Real::getJSON(int json_version) return JSON::makeNumber(result); } -QPDFObject::object_type_e -QPDF_Real::getTypeCode() const -{ - return QPDFObject::ot_real; -} - -char const* -QPDF_Real::getTypeName() const -{ - return "real"; -} - std::string QPDF_Real::getVal() { diff --git a/libqpdf/QPDF_Reserved.cc b/libqpdf/QPDF_Reserved.cc index 5808a369..f5af4688 100644 --- a/libqpdf/QPDF_Reserved.cc +++ b/libqpdf/QPDF_Reserved.cc @@ -2,6 +2,11 @@ #include <stdexcept> +QPDF_Reserved::QPDF_Reserved() : + QPDFValue(::ot_reserved, "reserved") +{ +} + std::shared_ptr<QPDFObject> QPDF_Reserved::create() { @@ -17,25 +22,15 @@ QPDF_Reserved::shallowCopy() std::string QPDF_Reserved::unparse() { - throw std::logic_error("attempt to unparse QPDF_Reserved"); + throw std::logic_error( + "QPDFObjectHandle: attempting to unparse a reserved object"); return ""; } JSON QPDF_Reserved::getJSON(int json_version) { - throw std::logic_error("attempt to generate JSON from QPDF_Reserved"); + throw std::logic_error( + "QPDFObjectHandle: attempting to unparse a reserved object"); return JSON::makeNull(); } - -QPDFObject::object_type_e -QPDF_Reserved::getTypeCode() const -{ - return QPDFObject::ot_reserved; -} - -char const* -QPDF_Reserved::getTypeName() const -{ - return "reserved"; -} diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 1b7f9461..9932c15d 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -114,6 +114,7 @@ QPDF_Stream::QPDF_Stream( QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : + QPDFValue(::ot_stream, "stream"), qpdf(qpdf), og(og), filter_on_write(true), @@ -291,22 +292,10 @@ QPDF_Stream::getStreamJSON( return result; } -QPDFObject::object_type_e -QPDF_Stream::getTypeCode() const -{ - return QPDFObject::ot_stream; -} - -char const* -QPDF_Stream::getTypeName() const -{ - return "stream"; -} - void QPDF_Stream::setDescription(QPDF* qpdf, std::string const& description) { - this->QPDFObject::setDescription(qpdf, description); + this->QPDFValue::setDescription(qpdf, description); setDictDescription(); } diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index b038366b..c6cb6c41 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -21,6 +21,7 @@ is_iso_latin1_printable(char ch) } QPDF_String::QPDF_String(std::string const& val) : + QPDFValue(::ot_string, "string"), val(val) { } @@ -84,18 +85,6 @@ QPDF_String::getJSON(int json_version) return JSON::makeString(result); } -QPDFObject::object_type_e -QPDF_String::getTypeCode() const -{ - return QPDFObject::ot_string; -} - -char const* -QPDF_String::getTypeName() const -{ - return "string"; -} - bool QPDF_String::useHexString() const { diff --git a/libqpdf/QPDF_Unresolved.cc b/libqpdf/QPDF_Unresolved.cc new file mode 100644 index 00000000..f824a9a6 --- /dev/null +++ b/libqpdf/QPDF_Unresolved.cc @@ -0,0 +1,36 @@ +#include <qpdf/QPDF_Unresolved.hh> + +#include <stdexcept> + +QPDF_Unresolved::QPDF_Unresolved(QPDF* qpdf, QPDFObjGen const& og) : + QPDFValue(::ot_unresolved, "unresolved", qpdf, og) +{ +} + +std::shared_ptr<QPDFObject> +QPDF_Unresolved::create(QPDF* qpdf, QPDFObjGen const& og) +{ + return do_create(new QPDF_Unresolved(qpdf, og)); +} + +std::shared_ptr<QPDFObject> +QPDF_Unresolved::shallowCopy() +{ + throw std::logic_error( + "attempted to shallow copy unresolved QPDFObjectHandle"); + return create(qpdf, og); +} + +std::string +QPDF_Unresolved::unparse() +{ + throw std::logic_error( + "attempted to unparse an unresolved QPDFObjectHandle"); + return ""; +} + +JSON +QPDF_Unresolved::getJSON(int json_version) +{ + return JSON::makeNull(); +} diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index a3d50cfb..131e7dee 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -14,8 +14,11 @@ // | st_initial // { | -> st_top -// "qpdf-v2": { | -> st_qpdf -// "objects": { | -> st_objects +// "qpdf": [ | -> st_qpdf +// { | -> st_qpdf_meta +// ... | ... +// }, | ... +// { | -> st_objects // "obj:1 0 R": { | -> st_object_top // "value": { | -> st_object // "/Pages": "2 0 R", | ... @@ -41,7 +44,7 @@ // } | <- st_trailer // } | <- st_objects // } | <- st_qpdf -// } | <- st_top +// ] | <- st_top // } | <- st_initial static char const* JSON_PDF = ( @@ -394,7 +397,7 @@ QPDF::JSONReactor::replaceObject( auto og = to_replace.getObjGen(); this->reserved.erase(og); this->pdf.replaceObject(og, replacement); - auto oh = pdf.getObjectByObjGen(og); + auto oh = pdf.getObject(og); setObjectDescription(oh, value); } diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index f89ed188..e09f7b68 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -137,8 +137,7 @@ QPDF::isLinearized() return false; } - QPDFObjectHandle candidate = QPDFObjectHandle::Factory::newIndirect( - this, QPDFObjGen(lindict_obj, 0)); + auto candidate = getObjectByID(lindict_obj, 0); if (!candidate.isDictionary()) { return false; } @@ -706,7 +705,7 @@ QPDF::getUncompressedObject( return obj; } else { int repl = (*(object_stream_data.find(obj.getObjectID()))).second; - return getObjectByObjGen(QPDFObjGen(repl, 0)); + return getObject(repl, 0); } } @@ -1381,9 +1380,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) stopOnError("found other than one root while" " calculating linearization data"); } - this->m->part4.push_back(getObjectByObjGen(*(lc_root.begin()))); + this->m->part4.push_back(getObject(*(lc_root.begin()))); for (auto const& og: lc_open_document) { - this->m->part4.push_back(getObjectByObjGen(og)); + this->m->part4.push_back(getObject(og)); } // Part 6: first page objects. Note: implementation note 124 @@ -1412,11 +1411,11 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // hint tables. for (auto const& og: lc_first_page_private) { - this->m->part6.push_back(getObjectByObjGen(og)); + this->m->part6.push_back(getObject(og)); } for (auto const& og: lc_first_page_shared) { - this->m->part6.push_back(getObjectByObjGen(og)); + this->m->part6.push_back(getObject(og)); } // Place the outline dictionary if it goes in the first page section. @@ -1462,7 +1461,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) for (auto const& og: this->m->obj_user_to_objects[ou]) { if (lc_other_page_private.count(og)) { lc_other_page_private.erase(og); - this->m->part7.push_back(getObjectByObjGen(og)); + this->m->part7.push_back(getObject(og)); ++this->m->c_page_offset_data.entries.at(i).nobjects; } } @@ -1479,7 +1478,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // Order is unimportant. for (auto const& og: lc_other_page_shared) { - this->m->part8.push_back(getObjectByObjGen(og)); + this->m->part8.push_back(getObject(og)); } // Part 9: other objects @@ -1501,7 +1500,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) for (auto const& og: pages_ogs) { if (lc_other.count(og)) { lc_other.erase(og); - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } } @@ -1531,7 +1530,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) for (auto const& og: ogs) { if (lc_thumbnail_private.count(og)) { lc_thumbnail_private.erase(og); - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } } } @@ -1544,7 +1543,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // Place shared thumbnail objects for (auto const& og: lc_thumbnail_shared) { - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } // Place outlines unless in first page @@ -1554,7 +1553,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // Place all remaining objects for (auto const& og: lc_other) { - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } // Make sure we got everything exactly once. @@ -1656,7 +1655,7 @@ QPDF::pushOutlinesToPart( lc_outlines.erase(outlines_og); part.push_back(outlines); for (auto const& og: lc_outlines) { - part.push_back(getObjectByObjGen(og)); + part.push_back(getObject(og)); ++this->m->c_outline_data.nobjects; } } diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index a1bede25..80e89b02 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -233,7 +233,7 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) newpage = makeIndirectObject(newpage); } else if (newpage.getOwningQPDF() != this) { QTC::TC("qpdf", "QPDF insert foreign page"); - newpage.getOwningQPDF()->pushInheritedAttributesToPage(); + newpage.getOwningQPDF(false)->pushInheritedAttributesToPage(); newpage = copyForeignObject(newpage); } else { QTC::TC("qpdf", "QPDF insert indirect page"); diff --git a/libqpdf/QTC.cc b/libqpdf/QTC.cc index d27bfa8b..8188846c 100644 --- a/libqpdf/QTC.cc +++ b/libqpdf/QTC.cc @@ -1,6 +1,7 @@ #include <qpdf/QTC.hh> #include <qpdf/QUtil.hh> +#include <map> #include <set> #include <stdio.h> @@ -12,14 +13,21 @@ tc_active(char const* const scope) } void -QTC::TC(char const* const scope, char const* const ccase, int n) +QTC::TC_real(char const* const scope, char const* const ccase, int n) { - static std::set<std::pair<std::string, int>> cache; + static std::map<std::string, bool> active; + auto is_active = active.find(scope); + if (is_active == active.end()) { + active[scope] = tc_active(scope); + is_active = active.find(scope); + } - if (!tc_active(scope)) { + if (!is_active->second) { return; } + static std::set<std::pair<std::string, int>> cache; + std::string filename; #ifdef _WIN32 # define TC_ENV "TC_WIN_FILENAME" diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 4e58aaf7..98a8f318 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -37,6 +37,9 @@ # include <sys/stat.h> # include <unistd.h> #endif +#ifdef HAVE_MALLOC_INFO +# include <malloc.h> +#endif // First element is 24 static unsigned short pdf_doc_low_to_unicode[] = { @@ -1207,52 +1210,6 @@ QUtil::random() return result; } -bool -QUtil::is_hex_digit(char ch) -{ - return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr)); -} - -bool -QUtil::is_space(char ch) -{ - return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr)); -} - -bool -QUtil::is_digit(char ch) -{ - return ((ch >= '0') && (ch <= '9')); -} - -bool -QUtil::is_number(char const* p) -{ - // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ - if (!*p) { - return false; - } - if ((*p == '-') || (*p == '+')) { - ++p; - } - bool found_dot = false; - bool found_digit = false; - for (; *p; ++p) { - if (*p == '.') { - if (found_dot) { - // only one dot - return false; - } - found_dot = true; - } else if (QUtil::is_digit(*p)) { - found_digit = true; - } else { - return false; - } - } - return found_digit; -} - void QUtil::read_file_into_memory( char const* filename, std::shared_ptr<char>& file_buf, size_t& size) @@ -2014,3 +1971,73 @@ QUtil::call_main_from_wmain( } #endif // QPDF_NO_WCHAR_T + +size_t +QUtil::get_max_memory_usage() +{ +#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM) + static std::regex tag_re("<(/?\\w+)([^>]*?)>"); + static std::regex attr_re("(\\w+)=\"(.*?)\""); + + char* buf; + size_t size; + FILE* f = open_memstream(&buf, &size); + if (f == nullptr) { + return 0; + } + malloc_info(0, f); + fclose(f); + if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) { + fprintf(stderr, "%s", buf); + } + + // Warning: this code uses regular expression to extract data from + // an XML string. This is generally a bad idea, but we're going to + // do it anyway because QUtil.hh warns against using this function + // for other than development/testing, and if this function fails + // to generate reasonable output during performance testing, it + // will be noticed. + + // This is my best guess at how to interpret malloc_info. Anyway + // it seems to provide useful information for detecting code + // changes that drastically change memory usage. + size_t result = 0; + try { + std::cregex_iterator m_begin(buf, buf + size, tag_re); + std::cregex_iterator cr_end; + std::sregex_iterator sr_end; + + int in_heap = 0; + for (auto m = m_begin; m != cr_end; ++m) { + std::string tag(m->str(1)); + if (tag == "heap") { + ++in_heap; + } else if (tag == "/heap") { + --in_heap; + } else if (in_heap == 0) { + std::string rest = m->str(2); + std::map<std::string, std::string> attrs; + std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re); + for (auto m2 = a_begin; m2 != sr_end; ++m2) { + attrs[m2->str(1)] = m2->str(2); + } + if (tag == "total") { + if (attrs.count("size") > 0) { + result += QIntC::to_size( + QUtil::string_to_ull(attrs["size"].c_str())); + } + } else if (tag == "system" && attrs["type"] == "max") { + result += QIntC::to_size( + QUtil::string_to_ull(attrs["size"].c_str())); + } + } + } + } catch (...) { + // ignore -- just return 0 + } + free(buf); + return result; +#else + return 0; +#endif +} diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh new file mode 100644 index 00000000..b83dbb1c --- /dev/null +++ b/libqpdf/qpdf/QPDFParser.hh @@ -0,0 +1,53 @@ +#ifndef QPDFPARSER_HH +#define QPDFPARSER_HH + +#include <qpdf/QPDFObjectHandle.hh> + +#include <memory> +#include <string> + +class QPDFParser +{ + public: + QPDFParser() = delete; + QPDFParser( + std::shared_ptr<InputSource> input, + std::string const& object_description, + QPDFTokenizer& tokenizer, + QPDFObjectHandle::StringDecrypter* decrypter, + QPDF* context) : + input(input), + object_description(object_description), + tokenizer(tokenizer), + decrypter(decrypter), + context(context) + { + } + virtual ~QPDFParser() = default; + + QPDFObjectHandle parse(bool& empty, bool content_stream); + + private: + enum parser_state_e { + st_top, + st_start, + st_stop, + st_eof, + st_dictionary, + st_array + }; + + void warn(qpdf_offset_t offset, std::string const& msg) const; + void warn(std::string const& msg) const; + static void warn(QPDF*, QPDFExc const&); + void setParsedOffset(qpdf_offset_t offset); + void + setDescriptionFromInput(QPDFObjectHandle oh, qpdf_offset_t offset) const; + std::shared_ptr<InputSource> input; + std::string const& object_description; + QPDFTokenizer& tokenizer; + QPDFObjectHandle::StringDecrypter* decrypter; + QPDF* context; +}; + +#endif // QPDFPARSER_HH diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh index 3e095637..426efe36 100644 --- a/libqpdf/qpdf/QPDF_Array.hh +++ b/libqpdf/qpdf/QPDF_Array.hh @@ -1,13 +1,13 @@ #ifndef QPDF_ARRAY_HH #define QPDF_ARRAY_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> #include <qpdf/SparseOHArray.hh> #include <list> #include <vector> -class QPDF_Array: public QPDFObject +class QPDF_Array: public QPDFValue { public: virtual ~QPDF_Array() = default; @@ -17,8 +17,6 @@ class QPDF_Array: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; int getNItems() const; QPDFObjectHandle getItem(int n) const; diff --git a/libqpdf/qpdf/QPDF_Bool.hh b/libqpdf/qpdf/QPDF_Bool.hh index dbedc70a..3e45cd8e 100644 --- a/libqpdf/qpdf/QPDF_Bool.hh +++ b/libqpdf/qpdf/QPDF_Bool.hh @@ -1,9 +1,9 @@ #ifndef QPDF_BOOL_HH #define QPDF_BOOL_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Bool: public QPDFObject +class QPDF_Bool: public QPDFValue { public: virtual ~QPDF_Bool() = default; @@ -11,8 +11,6 @@ class QPDF_Bool: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; bool getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Dictionary.hh b/libqpdf/qpdf/QPDF_Dictionary.hh index cacc8961..19ab8d9b 100644 --- a/libqpdf/qpdf/QPDF_Dictionary.hh +++ b/libqpdf/qpdf/QPDF_Dictionary.hh @@ -1,14 +1,14 @@ #ifndef QPDF_DICTIONARY_HH #define QPDF_DICTIONARY_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> #include <map> #include <set> #include <qpdf/QPDFObjectHandle.hh> -class QPDF_Dictionary: public QPDFObject +class QPDF_Dictionary: public QPDFValue { public: virtual ~QPDF_Dictionary() = default; @@ -17,8 +17,6 @@ class QPDF_Dictionary: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; // hasKey() and getKeys() treat keys with null values as if they // aren't there. getKey() returns null for the value of a diff --git a/libqpdf/qpdf/QPDF_InlineImage.hh b/libqpdf/qpdf/QPDF_InlineImage.hh index caaeaf87..b7bea9c7 100644 --- a/libqpdf/qpdf/QPDF_InlineImage.hh +++ b/libqpdf/qpdf/QPDF_InlineImage.hh @@ -1,9 +1,9 @@ #ifndef QPDF_INLINEIMAGE_HH #define QPDF_INLINEIMAGE_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_InlineImage: public QPDFObject +class QPDF_InlineImage: public QPDFValue { public: virtual ~QPDF_InlineImage() = default; @@ -11,8 +11,6 @@ class QPDF_InlineImage: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Integer.hh b/libqpdf/qpdf/QPDF_Integer.hh index 2c17daf0..7e09673c 100644 --- a/libqpdf/qpdf/QPDF_Integer.hh +++ b/libqpdf/qpdf/QPDF_Integer.hh @@ -1,9 +1,9 @@ #ifndef QPDF_INTEGER_HH #define QPDF_INTEGER_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Integer: public QPDFObject +class QPDF_Integer: public QPDFValue { public: virtual ~QPDF_Integer() = default; @@ -11,8 +11,6 @@ class QPDF_Integer: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; long long getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Name.hh b/libqpdf/qpdf/QPDF_Name.hh index cf653b2e..74fc7e44 100644 --- a/libqpdf/qpdf/QPDF_Name.hh +++ b/libqpdf/qpdf/QPDF_Name.hh @@ -1,9 +1,9 @@ #ifndef QPDF_NAME_HH #define QPDF_NAME_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Name: public QPDFObject +class QPDF_Name: public QPDFValue { public: virtual ~QPDF_Name() = default; @@ -11,8 +11,6 @@ class QPDF_Name: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getName() const; // Put # into strings with characters unsuitable for name token diff --git a/libqpdf/qpdf/QPDF_Null.hh b/libqpdf/qpdf/QPDF_Null.hh index 16833424..68973de9 100644 --- a/libqpdf/qpdf/QPDF_Null.hh +++ b/libqpdf/qpdf/QPDF_Null.hh @@ -1,9 +1,9 @@ #ifndef QPDF_NULL_HH #define QPDF_NULL_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Null: public QPDFObject +class QPDF_Null: public QPDFValue { public: virtual ~QPDF_Null() = default; @@ -11,11 +11,9 @@ class QPDF_Null: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; private: - QPDF_Null() = default; + QPDF_Null(); }; #endif // QPDF_NULL_HH diff --git a/libqpdf/qpdf/QPDF_Operator.hh b/libqpdf/qpdf/QPDF_Operator.hh index 1da43d72..767c0ba0 100644 --- a/libqpdf/qpdf/QPDF_Operator.hh +++ b/libqpdf/qpdf/QPDF_Operator.hh @@ -1,9 +1,9 @@ #ifndef QPDF_OPERATOR_HH #define QPDF_OPERATOR_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Operator: public QPDFObject +class QPDF_Operator: public QPDFValue { public: virtual ~QPDF_Operator() = default; @@ -11,8 +11,6 @@ class QPDF_Operator: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Real.hh b/libqpdf/qpdf/QPDF_Real.hh index f5ab4bd6..dc0f3ff8 100644 --- a/libqpdf/qpdf/QPDF_Real.hh +++ b/libqpdf/qpdf/QPDF_Real.hh @@ -1,9 +1,9 @@ #ifndef QPDF_REAL_HH #define QPDF_REAL_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Real: public QPDFObject +class QPDF_Real: public QPDFValue { public: virtual ~QPDF_Real() = default; @@ -13,8 +13,6 @@ class QPDF_Real: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getVal(); private: diff --git a/libqpdf/qpdf/QPDF_Reserved.hh b/libqpdf/qpdf/QPDF_Reserved.hh index 243a1728..f90242a9 100644 --- a/libqpdf/qpdf/QPDF_Reserved.hh +++ b/libqpdf/qpdf/QPDF_Reserved.hh @@ -1,9 +1,9 @@ #ifndef QPDF_RESERVED_HH #define QPDF_RESERVED_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Reserved: public QPDFObject +class QPDF_Reserved: public QPDFValue { public: virtual ~QPDF_Reserved() = default; @@ -11,11 +11,9 @@ class QPDF_Reserved: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; private: - QPDF_Reserved() = default; + QPDF_Reserved(); }; #endif // QPDF_RESERVED_HH diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index 8980c751..3a16160e 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -3,9 +3,9 @@ #include <qpdf/Types.h> -#include <qpdf/QPDFObject.hh> #include <qpdf/QPDFObjectHandle.hh> #include <qpdf/QPDFStreamFilter.hh> +#include <qpdf/QPDFValue.hh> #include <functional> #include <memory> @@ -13,7 +13,7 @@ class Pipeline; class QPDF; -class QPDF_Stream: public QPDFObject +class QPDF_Stream: public QPDFValue { public: virtual ~QPDF_Stream() = default; @@ -26,8 +26,6 @@ class QPDF_Stream: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; virtual void setDescription(QPDF*, std::string const&); QPDFObjectHandle getDict() const; bool isDataModified() const; diff --git a/libqpdf/qpdf/QPDF_String.hh b/libqpdf/qpdf/QPDF_String.hh index b6d77637..a92427e3 100644 --- a/libqpdf/qpdf/QPDF_String.hh +++ b/libqpdf/qpdf/QPDF_String.hh @@ -1,11 +1,11 @@ #ifndef QPDF_STRING_HH #define QPDF_STRING_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> // QPDF_Strings may included embedded null characters. -class QPDF_String: public QPDFObject +class QPDF_String: public QPDFValue { friend class QPDFWriter; @@ -16,8 +16,6 @@ class QPDF_String: public QPDFObject create_utf16(std::string const& utf8_val); virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string unparse(bool force_binary); virtual JSON getJSON(int json_version); std::string getVal() const; diff --git a/libqpdf/qpdf/QPDF_Unresolved.hh b/libqpdf/qpdf/QPDF_Unresolved.hh new file mode 100644 index 00000000..efcf4e3d --- /dev/null +++ b/libqpdf/qpdf/QPDF_Unresolved.hh @@ -0,0 +1,19 @@ +#ifndef QPDF_UNRESOLVED_HH +#define QPDF_UNRESOLVED_HH + +#include <qpdf/QPDFValue.hh> + +class QPDF_Unresolved: public QPDFValue +{ + public: + virtual ~QPDF_Unresolved() = default; + static std::shared_ptr<QPDFObject> create(QPDF* qpdf, QPDFObjGen const& og); + virtual std::shared_ptr<QPDFObject> shallowCopy(); + virtual std::string unparse(); + virtual JSON getJSON(int json_version); + + private: + QPDF_Unresolved(QPDF* qpdf, QPDFObjGen const& og); +}; + +#endif // QPDF_UNRESOLVED_HH diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 7c3bb266..eb272a04 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -883,6 +883,9 @@ for debugging qpdf. ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between the output of qpdf --json and the output of qpdf --json-help. )"); +ap.addOptionHelp("--report-mem-usage", "testing", "best effort report of memory usage", R"(This is used by qpdf's performance test suite to report the +maximum amount of memory used in supported environments. +)"); } static void add_help(QPDFArgParser& ap) { diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index b90592e0..ad110d16 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -69,6 +69,7 @@ this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); +this->ap.addBare("report-mem-usage", [this](){c_main->reportMemUsage();}); this->ap.addBare("requires-password", [this](){c_main->requiresPassword();}); this->ap.addBare("show-encryption", [this](){c_main->showEncryption();}); this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index 8f8fb987..1cd69368 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -409,6 +409,9 @@ popHandler(); // key: pages pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels +pushKey("reportMemUsage"); +addBare([this]() { c_main->reportMemUsage(); }); +popHandler(); // key: reportMemUsage pushKey("rotate"); addParameter([this](std::string const& p) { c_main->rotate(p); }); popHandler(); // key: rotate diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index aa69c192..9272c596 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -144,6 +144,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ } ], "removePageLabels": "remove explicit page numbers", + "reportMemUsage": "best effort report of memory usage", "rotate": "rotate pages", "overlay": { "file": "source file for overlay", diff --git a/libqpdf/qpdf/qpdf-config.h.in b/libqpdf/qpdf/qpdf-config.h.in index 8a22b875..500f55cc 100644 --- a/libqpdf/qpdf/qpdf-config.h.in +++ b/libqpdf/qpdf/qpdf-config.h.in @@ -21,6 +21,8 @@ #cmakedefine HAVE_LOCALTIME_R 1 #cmakedefine HAVE_RANDOM 1 #cmakedefine HAVE_TM_GMTOFF 1 +#cmakedefine HAVE_MALLOC_INFO 1 +#cmakedefine HAVE_OPEN_MEMSTREAM 1 /* printf format for long long */ #cmakedefine LL_FMT "${LL_FMT}" diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out index 48d22fb9..8d3e6d8e 100644 --- a/libtests/qtest/qutil/qutil.out +++ b/libtests/qtest/qutil/qutil.out @@ -134,3 +134,5 @@ D:20210209191925Z 2021-02-09T19:19:25Z ---- is_long_long done +---- memory usage +memory usage okay diff --git a/libtests/qutil.cc b/libtests/qutil.cc index 995a7599..82c2dd1a 100644 --- a/libtests/qutil.cc +++ b/libtests/qutil.cc @@ -703,6 +703,18 @@ is_long_long_test() std::cout << "done" << std::endl; } +void +memory_usage_test() +{ + auto u1 = QUtil::get_max_memory_usage(); + if (u1 > 0) { + auto x = QUtil::make_shared_array<int>(10 << 20); + auto u2 = QUtil::get_max_memory_usage(); + assert(u2 > u1); + } + std::cout << "memory usage okay" << std::endl; +} + int main(int argc, char* argv[]) { @@ -739,6 +751,8 @@ main(int argc, char* argv[]) timestamp_test(); std::cout << "---- is_long_long" << std::endl; is_long_long_test(); + std::cout << "---- memory usage" << std::endl; + memory_usage_test(); } catch (std::exception& e) { std::cout << "unexpected exception: " << e.what() << std::endl; } diff --git a/manual/cli.rst b/manual/cli.rst index 021d1dc7..57682c1a 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -3463,6 +3463,16 @@ Related Options memory and is therefore unsuitable for use with large files. This is why it's also not on by default. +.. qpdf:option:: --report-mem-usage + + .. help: best effort report of memory usage + + This is used by qpdf's performance test suite to report the + maximum amount of memory used in supported environments. + + This is used by qpdf's performance test suite to report the maximum + amount of memory used in supported environments. + .. _unicode-passwords: Unicode Passwords diff --git a/manual/installation.rst b/manual/installation.rst index e02380ee..08c49765 100644 --- a/manual/installation.rst +++ b/manual/installation.rst @@ -257,6 +257,16 @@ CHECK_SIZES that ensures an exact match between classes in ``sizes.cc`` and classes in the library's public API. This option requires Python 3. +ENABLE_QTC + This is off by default, except in maintainer mode. When off, + ``QTC::TC`` calls are compiled out by having ``QTC::TC`` be an empty + inline function. The underlying ``QTC::TC`` remains in the library, + so it is possible to build and package the qpdf library with + ``ENABLE_QTC`` turned off while still allowing developer code to use + ``QTC::TC`` if desired. If you are modifying qpdf code, it's a good + idea to have this on for more robust automated testing. Otherwise, + there's no reason to have it on. + GENERATE_AUTO_JOB Some qpdf source files are automatically generated from :file:`job.yml` and the CLI documentation. If you are adding new @@ -297,6 +307,8 @@ MAINTAINER_MODE - ``CHECK_SIZES`` + - ``ENABLE_QTC`` + - ``GENERATE_AUTO_JOB`` - ``WERROR`` diff --git a/manual/release-notes.rst b/manual/release-notes.rst index ebbfd4f5..01a19249 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -7,6 +7,13 @@ For a detailed list of changes, please see the file :file:`ChangeLog` in the source distribution. 11.0.0 + - Performance improvements + + - Many performance enhancements have been added. In developer + performance benchmarks, gains on the order of 20% have been + observed. Most of that work, including major optimization of + qpdf's lexical layer, was done by M. Holger. + - Replacement of ``PointerHolder`` with ``std::shared_ptr`` - The qpdf-specific ``PointerHolder`` smart pointer implementation @@ -231,6 +238,14 @@ For a detailed list of changes, please see the file - The qpdf source code is now formatted automatically with ``clang-format``. See :ref:`code-formatting` for information. + - Test coverage with ``QTC`` is enabled during development but + compiled out of distributed qpdf binaries by default. This + results in a significant performance improvement, especially on + Windows. ``QTC::TC`` is still available in the library and is + still usable by end user code even though calls to it made + internally by the library are turned off. Internally, there is + some additional caching to reduce the overhead of repeatedly + reading environment variables at runtime. 10.6.3: March 8, 2022 - Announcement of upcoming change: diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index f535b9ee..c3ab0a07 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -56,18 +56,20 @@ QPDF missing trailer 0 QPDF trailer lacks size 0 QPDF trailer size not integer 0 QPDF trailer prev not integer 0 -QPDFObjectHandle bad brace 0 -QPDFObjectHandle bad array close 0 +QPDFParser bad brace 0 +QPDFParser bad array close 0 QPDF stream without length 0 QPDF stream length not integer 0 QPDF missing endstream 0 -QPDFObjectHandle bad dictionary close 0 +QPDFParser bad dictionary close 0 QPDF can't find xref 0 QPDFTokenizer bad ) 0 QPDFTokenizer bad > 0 QPDFTokenizer bad hexstring character 0 +QPDFTokenizer bad hexstring 2nd character 0 QPDFTokenizer null in name 0 -QPDFTokenizer bad name 0 +QPDFTokenizer bad name 1 0 +QPDFTokenizer bad name 2 0 QPDF_Stream invalid filter 0 QPDF UseOutlines but no Outlines 0 QPDFObjectHandle makeDirect loop 0 @@ -213,7 +215,7 @@ QPDF not copying pages object 0 QPDF insert foreign page 0 QPDFWriter foreign object 0 QPDFWriter copy use_aes 1 -QPDFObjectHandle indirect without context 0 +QPDFParser indirect without context 0 QPDFObjectHandle trailing data in parse 0 QPDFJob pages encryption password 0 QPDFTokenizer EOF reading token 0 @@ -252,12 +254,12 @@ QPDFWriter standard deterministic ID 1 QPDFWriter linearized deterministic ID 1 QPDFWriter deterministic with no data 0 qpdf-c called qpdf_set_deterministic_ID 0 -QPDFObjectHandle indirect with 0 objid 0 +QPDFParser indirect with 0 objid 0 QPDF object id 0 0 QPDF recursion loop in resolve 0 -QPDFObjectHandle treat word as string 0 -QPDFObjectHandle found fake 1 -QPDFObjectHandle no val for last key 0 +QPDFParser treat word as string 0 +QPDFParser found fake 1 +QPDFParser no val for last key 0 QPDF resolve failure to null 0 QPDFWriter preserve unreferenced standard 0 QPDFObjectHandle errors in parsecontent 0 @@ -286,8 +288,8 @@ QPDFObjectHandle non-stream in stream array 0 QPDFObjectHandle coalesce called on stream 0 QPDFObjectHandle coalesce provide stream data 0 QPDF_Stream bad token at end during normalize 0 -QPDFObjectHandle bad token in parse 0 -QPDFObjectHandle eof in parseInternal 0 +QPDFParser bad token in parse 0 +QPDFParser eof in parse 0 QPDFObjectHandle array bounds 0 QPDFObjectHandle boolean returning false 0 QPDFObjectHandle integer returning 0 0 @@ -315,7 +317,7 @@ QPDFObjectHandle numeric non-numeric 0 QPDFObjectHandle erase array bounds 0 qpdf-c called qpdf_check_pdf 0 QPDF xref loop 0 -QPDFObjectHandle too deep 0 +QPDFParser too deep 0 QPDFFormFieldObjectHelper non-trivial inheritance 0 QPDFFormFieldObjectHelper non-trivial qualified name 0 QPDFFormFieldObjectHelper TU present 0 @@ -426,7 +428,7 @@ QPDF eof skipping spaces before xref 1 QPDF_encryption user matches owner V < 5 0 QPDF_encryption same password 1 QPDFWriter stream in ostream 0 -QPDFObjectHandle duplicate dict key 0 +QPDFParser duplicate dict key 0 QPDFWriter no encryption sig contents 0 QPDFPageObjectHelper colorspace lookup 0 QPDFWriter ignore XRef in qdf mode 0 diff --git a/qpdf/qtest/qpdf/issue-51.out b/qpdf/qtest/qpdf/issue-51.out index b4bd165c..feffea44 100644 --- a/qpdf/qtest/qpdf/issue-51.out +++ b/qpdf/qtest/qpdf/issue-51.out @@ -9,7 +9,6 @@ WARNING: issue-51.pdf (object 2 0, offset 26): /Length key in stream dictionary WARNING: issue-51.pdf (object 2 0, offset 71): attempting to recover stream length WARNING: issue-51.pdf (object 2 0, offset 71): unable to recover stream data; treating stream as empty WARNING: issue-51.pdf (object 2 0, offset 977): expected endobj -WARNING: issue-51.pdf (object 2 0, offset 977): EOF after endobj WARNING: issue-51.pdf (object 3 0): object has offset 0 WARNING: issue-51.pdf (object 4 0): object has offset 0 WARNING: issue-51.pdf (object 5 0): object has offset 0 diff --git a/qpdf/qtest/type-checks.test b/qpdf/qtest/type-checks.test index 03d75a6c..17b3c994 100644 --- a/qpdf/qtest/type-checks.test +++ b/qpdf/qtest/type-checks.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('type-checks'); -my $n_tests = 5; +my $n_tests = 6; # Whenever object-types.pdf is edited, object-types-os.pdf should be # regenerated. @@ -43,6 +43,10 @@ $td->runtest("compound type checks", {$td->COMMAND => "test_driver 82 object-types-os.pdf"}, {$td->STRING => "test 82 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("indirect objects belonging to destroyed QPDF", + {$td->COMMAND => "test_driver 92 -"}, + {$td->STRING => "test 92 done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); cleanup(); $td->report($n_tests); diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index aa4f9ce5..5572e824 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -3258,6 +3258,20 @@ test_91(QPDF& pdf, char const* arg2) 2, &p, qpdf_dl_none, qpdf_sj_inline, "", std::set<std::string>()); } +static void +test_92(QPDF& pdf, char const* arg2) +{ + // Exercise indirect objects owned by destroyed QPDF object. + QPDF* qpdf = new QPDF(); + qpdf->emptyPDF(); + auto root = qpdf->getRoot(); + assert(root.getOwningQPDF() != nullptr); + assert(root.isIndirect()); + delete qpdf; + assert(root.getOwningQPDF() == nullptr); + assert(!root.isIndirect()); +} + void runtest(int n, char const* filename1, char const* arg2) { @@ -3265,7 +3279,7 @@ runtest(int n, char const* filename1, char const* arg2) // the test suite to see how the test is invoked to find the file // that the test is supposed to operate on. - std::set<int> ignore_filename = {61, 81, 83, 84, 85, 86, 87}; + std::set<int> ignore_filename = {61, 81, 83, 84, 85, 86, 87, 92}; if (n == 0) { // Throw in some random test cases that don't fit anywhere @@ -3362,7 +3376,8 @@ runtest(int n, char const* filename1, char const* arg2) {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, - {88, test_88}, {89, test_89}, {90, test_90}, {91, test_91}}; + {88, test_88}, {89, test_89}, {90, test_90}, {91, test_91}, + {92, test_92}}; auto fn = test_functions.find(n); if (fn == test_functions.end()) { diff --git a/qtest/bin/qtest-driver b/qtest/bin/qtest-driver index c37c4d1d..fa8d82c3 100755 --- a/qtest/bin/qtest-driver +++ b/qtest/bin/qtest-driver @@ -33,7 +33,7 @@ require TestDriver; if ((@ARGV == 1) && ($ARGV[0] eq '--version')) { - print "$whoami version 1.8\n"; + print "$whoami version 1.9\n"; exit 0; } if ((@ARGV == 1) && ($ARGV[0] eq '--print-path')) @@ -13,6 +13,7 @@ my $code = undef; my @bin = (); my $color = undef; my $show_on_failure = 0; +my $disable_tc = 0; my @tc = (); if ($^O =~ m/^MSWin32|msys$/) @@ -51,6 +52,10 @@ while (@ARGV) usage() unless @ARGV; $show_on_failure = cmake_bool(shift(@ARGV)); } + elsif ($arg eq '--disable-tc') + { + $disable_tc = 1; + } elsif ($arg eq '--tc') { usage() unless @ARGV; @@ -94,7 +99,7 @@ push(@cmd, "-datadir", "$code/qtest", "-junit-suffix", basename($code)); -if (scalar(@tc)) +if (scalar(@tc) && (! $disable_tc)) { my @tc_srcs = map { File::Spec->abs2rel(abs_path($_)) |