diff options
Diffstat (limited to 'include/qpdf/QUtil.hh')
-rw-r--r-- | include/qpdf/QUtil.hh | 330 |
1 files changed, 137 insertions, 193 deletions
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index 85e7f907..47ecf6bf 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QUTIL_HH #define QUTIL_HH @@ -40,8 +37,7 @@ class Pipeline; namespace QUtil { - // This is a collection of useful utility functions that don't - // really go anywhere else. + // This is a collection of useful utility functions that don't really go anywhere else. QPDF_DLL std::string int_to_string(long long, int length = 0); QPDF_DLL @@ -53,8 +49,7 @@ namespace QUtil QPDF_DLL std::string double_to_string(double, int decimal_places = 0, bool trim_trailing_zeroes = true); - // These string to number methods throw std::runtime_error on - // underflow/overflow. + // These string to number methods throw std::runtime_error on underflow/overflow. QPDF_DLL long long string_to_ll(char const* str); QPDF_DLL @@ -64,63 +59,53 @@ namespace QUtil QPDF_DLL unsigned int string_to_uint(char const* str); - // Returns true if this exactly represents a long long. The - // determination is made by converting the string to a long long, - // then converting the result back to a string, and then comparing + // Returns true if this exactly represents a long long. The determination is made by converting + // the string to a long long, then converting the result back to a string, and then comparing // that result with the original string. QPDF_DLL bool is_long_long(char const* str); - // Pipeline's write method wants unsigned char*, but we often have - // some other type of string. These methods do combinations of - // const_cast and reinterpret_cast to give us an unsigned char*. - // They should only be used when it is known that it is safe. - // None of the pipelines in qpdf modify the data passed to them, - // so within qpdf, it should always be safe. + // Pipeline's write method wants unsigned char*, but we often have some other type of string. + // These methods do combinations of const_cast and reinterpret_cast to give us an unsigned + // char*. They should only be used when it is known that it is safe. None of the pipelines in + // qpdf modify the data passed to them, so within qpdf, it should always be safe. QPDF_DLL unsigned char* unsigned_char_pointer(std::string const& str); QPDF_DLL unsigned char* unsigned_char_pointer(char const* str); - // Throw QPDFSystemError, which is derived from - // std::runtime_error, with a string formed by appending to - // "description: " the standard string corresponding to the - // current value of errno. You can retrieve the value of errno by - // calling getErrno() on the QPDFSystemError. Prior to qpdf 8.2.0, - // this method threw system::runtime_error directly, but since - // QPDFSystemError is derived from system::runtime_error, old code - // that specifically catches std::runtime_error will still work. + // Throw QPDFSystemError, which is derived from std::runtime_error, with a string formed by + // appending to "description: " the standard string corresponding to the current value of errno. + // You can retrieve the value of errno by calling getErrno() on the QPDFSystemError. Prior to + // qpdf 8.2.0, this method threw system::runtime_error directly, but since QPDFSystemError is + // derived from system::runtime_error, old code that specifically catches std::runtime_error + // will still work. QPDF_DLL void throw_system_error(std::string const& description); - // The status argument is assumed to be the return value of a - // standard library call that sets errno when it fails. If status - // is -1, convert the current value of errno to a - // std::runtime_error that includes the standard error string. - // Otherwise, return status. + // The status argument is assumed to be the return value of a standard library call that sets + // errno when it fails. If status is -1, convert the current value of errno to a + // std::runtime_error that includes the standard error string. Otherwise, return status. QPDF_DLL int os_wrapper(std::string const& description, int status); - // If the open fails, throws std::runtime_error. Otherwise, the - // FILE* is returned. The filename should be UTF-8 encoded, even - // on Windows. It will be converted as needed on Windows. + // If the open fails, throws std::runtime_error. Otherwise, the FILE* is returned. The filename + // should be UTF-8 encoded, even on Windows. It will be converted as needed on Windows. QPDF_DLL FILE* safe_fopen(char const* filename, char const* mode); - // The FILE* argument is assumed to be the return of fopen. If - // null, throw std::runtime_error. Otherwise, return the FILE* - // argument. + // The FILE* argument is assumed to be the return of fopen. If null, throw std::runtime_error. + // Otherwise, return the FILE* argument. QPDF_DLL FILE* fopen_wrapper(std::string const&, FILE*); - // This is a little class to help with automatic closing files. - // You can do something like + // This is a little class to help with automatic closing files. You can do something like // // QUtil::FileCloser fc(QUtil::safe_fopen(filename, "rb")); // - // and then use fc.f to the file. Be sure to actually declare a - // variable of type FileCloser. Using it as a temporary won't work - // because it will close the file as soon as it goes out of scope. + // and then use fc.f to the file. Be sure to actually declare a variable of type FileCloser. + // Using it as a temporary won't work because it will close the file as soon as it goes out of + // scope. class FileCloser { public: @@ -160,28 +145,24 @@ namespace QUtil QPDF_DLL void rename_file(char const* oldname, char const* newname); - // Write the contents of filename as a binary file to the - // pipeline. + // Write the contents of filename as a binary file to the pipeline. QPDF_DLL void pipe_file(char const* filename, Pipeline* p); - // Return a function that will send the contents of the given file - // through the given pipeline as binary data. + // Return a function that will send the contents of the given file through the given pipeline as + // binary data. QPDF_DLL std::function<void(Pipeline*)> file_provider(std::string const& filename); - // Return the last path element. On Windows, either / or \ are - // path separators. Otherwise, only / is a path separator. Strip - // any trailing path separators. Then, if any path separators - // remain, return everything after the last path separator. - // Otherwise, return the whole string. As a special case, if a - // string consists entirely of path separators, the first - // character is returned. + // Return the last path element. On Windows, either / or \ are path separators. Otherwise, only + // / is a path separator. Strip any trailing path separators. Then, if any path separators + // remain, return everything after the last path separator. Otherwise, return the whole string. + // As a special case, if a string consists entirely of path separators, the first character is + // returned. QPDF_DLL std::string path_basename(std::string const& filename); - // Returns a dynamically allocated copy of a string that the - // caller has to delete with delete[]. + // Returns a dynamically allocated copy of a string that the caller has to delete with delete[]. QPDF_DLL char* copy_string(std::string const&); @@ -193,8 +174,7 @@ namespace QUtil QPDF_DLL std::unique_ptr<char[]> make_unique_cstr(std::string const&); - // Create a shared pointer to an array. From c++20, - // std::make_shared<T[]>(n) does this. + // Create a shared pointer to an array. From c++20, std::make_shared<T[]>(n) does this. template <typename T> std::shared_ptr<T> make_shared_array(size_t n) @@ -202,27 +182,24 @@ namespace QUtil return std::shared_ptr<T>(new T[n], std::default_delete<T[]>()); } - // Returns lower-case hex-encoded version of the string, treating - // each character in the input string as unsigned. The output - // string will be twice as long as the input string. + // Returns lower-case hex-encoded version of the string, treating each character in the input + // string as unsigned. The output string will be twice as long as the input string. QPDF_DLL std::string hex_encode(std::string const&); - // Returns lower-case hex-encoded version of the char including a leading - // "#". + // Returns lower-case hex-encoded version of the char including a leading "#". QPDF_DLL inline std::string hex_encode_char(char); - // Returns a string that is the result of decoding the input - // string. The input string may consist of mixed case hexadecimal - // digits. Any characters that are not hexadecimal digits will be - // silently ignored. If there are an odd number of hexadecimal - // digits, a trailing 0 will be assumed. + // Returns a string that is the result of decoding the input string. The input string may + // consist of mixed case hexadecimal digits. Any characters that are not hexadecimal digits will + // be silently ignored. If there are an odd number of hexadecimal digits, a trailing 0 will be + // assumed. QPDF_DLL std::string hex_decode(std::string const&); - // Decode a single hex digit into a char in the range 0 <= char < 16. Return - // a char >= 16 if digit is not a valid hex digit. + // Decode a single hex digit into a char in the range 0 <= char < 16. Return a char >= 16 if + // digit is not a valid hex digit. QPDF_DLL inline constexpr char hex_decode_char(char digit) noexcept; @@ -239,17 +216,15 @@ namespace QUtil QPDF_DLL char* getWhoami(char* argv0); - // Get the value of an environment variable in a portable fashion. - // Returns true iff the variable is defined. If `value' is - // non-null, initializes it with the value of the variable. + // Get the value of an environment variable in a portable fashion. Returns true iff the variable + // is defined. If `value' is non-null, initializes it with the value of the variable. QPDF_DLL bool get_env(std::string const& var, std::string* value = nullptr); QPDF_DLL time_t get_current_time(); - // Portable structure representing a point in time with second - // granularity and time zone offset + // Portable structure representing a point in time with second granularity and time zone offset struct QPDFTime { QPDFTime() = default; @@ -277,12 +252,11 @@ namespace QUtil QPDF_DLL QPDFTime get_current_qpdf_time(); - // Convert a QPDFTime structure to a PDF timestamp string, which - // is "D:yyyymmddhhmmss<z>" where <z> is either "Z" for UTC or - // "-hh'mm'" or "+hh'mm'" for timezone offset. <z> may also be - // omitted. Examples: "D:20210207161528-05'00'", - // "D:20210207211528Z", "D:20210207211528". See - // get_current_qpdf_time and the QPDFTime structure above. + // Convert a QPDFTime structure to a PDF timestamp string, which is "D:yyyymmddhhmmss<z>" where + // <z> is either "Z" for UTC or "-hh'mm'" or "+hh'mm'" for timezone offset. <z> may also be + // omitted. + // Examples: "D:20210207161528-05'00'", "D:20210207211528Z", "D:20210207211528". + // See get_current_qpdf_time and the QPDFTime structure above. QPDF_DLL std::string qpdf_time_to_pdf_time(QPDFTime const&); @@ -290,63 +264,53 @@ namespace QUtil QPDF_DLL std::string qpdf_time_to_iso8601(QPDFTime const&); - // Convert a PDF timestamp string to a QPDFTime. If syntactically - // valid, return true and fill in qtm. If not valid, return false, - // and do not modify qtm. If qtm is null, just check the validity - // of the string. + // Convert a PDF timestamp string to a QPDFTime. If syntactically valid, return true and fill in + // qtm. If not valid, return false, and do not modify qtm. If qtm is null, just check the + // validity of the string. QPDF_DLL bool pdf_time_to_qpdf_time(std::string const&, QPDFTime* qtm = nullptr); - // Convert PDF timestamp to a second-granularity ISO-8601 - // timestamp. If syntactically valid, return true and initialize - // iso8601. Otherwise, return false. + // Convert PDF timestamp to a second-granularity ISO-8601 timestamp. If syntactically valid, + // return true and initialize iso8601. Otherwise, return false. bool pdf_time_to_iso8601(std::string const& pdf_time, std::string& iso8601); - // Return a string containing the byte representation of the UTF-8 - // encoding for the unicode value passed in. + // Return a string containing the byte representation of the UTF-8 encoding for the unicode + // value passed in. QPDF_DLL std::string toUTF8(unsigned long uval); - // Return a string containing the byte representation of the - // UTF-16 big-endian encoding for the unicode value passed in. - // Unrepresentable code points are converted to U+FFFD. + // Return a string containing the byte representation of the UTF-16 big-endian encoding for the + // unicode value passed in. Unrepresentable code points are converted to U+FFFD. QPDF_DLL std::string toUTF16(unsigned long uval); - // If utf8_val.at(pos) points to the beginning of a valid - // UTF-8-encoded character, return the codepoint of the character - // and set error to false. Otherwise, return 0xfffd and set error - // to true. In all cases, pos is advanced to the next position - // that may begin a valid character. When the string has been - // consumed, pos will be set to the string length. It is an error - // to pass a value of pos that is greater than or equal to the - // length of the string. + // If utf8_val.at(pos) points to the beginning of a valid UTF-8-encoded character, return the + // codepoint of the character and set error to false. Otherwise, return 0xfffd and set error to + // true. In all cases, pos is advanced to the next position that may begin a valid character. + // When the string has been consumed, pos will be set to the string length. It is an error to + // pass a value of pos that is greater than or equal to the length of the string. QPDF_DLL unsigned long get_next_utf8_codepoint(std::string const& utf8_val, size_t& pos, bool& error); - // Test whether this is a UTF-16 string. This is indicated by - // first two bytes being 0xFE 0xFF (big-endian) or 0xFF 0xFE - // (little-endian), each of which is the encoding of U+FEFF, the - // Unicode marker. Starting in qpdf 10.6.2, this detects - // little-endian as well as big-endian. Even though the PDF spec - // doesn't allow little-endian, most readers seem to accept it. + // Test whether this is a UTF-16 string. This is indicated by first two bytes being 0xFE 0xFF + // (big-endian) or 0xFF 0xFE (little-endian), each of which is the encoding of U+FEFF, the + // Unicode marker. Starting in qpdf 10.6.2, this detects little-endian as well as big-endian. + // Even though the PDF spec doesn't allow little-endian, most readers seem to accept it. QPDF_DLL bool is_utf16(std::string const&); - // Test whether this is an explicit UTF-8 string as allowed by the - // PDF 2.0 spec. This is indicated by first three bytes being 0xEF - // 0xBB 0xBF, which is the UTF-8 encoding of U+FEFF. + // Test whether this is an explicit UTF-8 string as allowed by the PDF 2.0 spec. This is + // indicated by first three bytes being 0xEF 0xBB 0xBF, which is the UTF-8 encoding of U+FEFF. QPDF_DLL bool is_explicit_utf8(std::string const&); - // Convert a UTF-8 encoded string to UTF-16 big-endian. - // Unrepresentable code points are converted to U+FFFD. + // Convert a UTF-8 encoded string to UTF-16 big-endian. Unrepresentable code points are + // converted to U+FFFD. QPDF_DLL std::string utf8_to_utf16(std::string const& utf8); - // Convert a UTF-8 encoded string to the specified single-byte - // encoding system by replacing all unsupported characters with - // the given unknown_char. + // Convert a UTF-8 encoded string to the specified single-byte encoding system by replacing all + // unsupported characters with the given unknown_char. QPDF_DLL std::string utf8_to_ascii(std::string const& utf8, char unknown_char = '?'); QPDF_DLL @@ -356,9 +320,8 @@ namespace QUtil QPDF_DLL std::string utf8_to_pdf_doc(std::string const& utf8, char unknown_char = '?'); - // These versions return true if the conversion was successful and - // false if any unrepresentable characters were found and had to - // be substituted with the unknown character. + // These versions return true if the conversion was successful and false if any unrepresentable + // characters were found and had to be substituted with the unknown character. QPDF_DLL bool utf8_to_ascii(std::string const& utf8, std::string& ascii, char unknown_char = '?'); QPDF_DLL @@ -373,9 +336,8 @@ namespace QUtil QPDF_DLL std::string utf16_to_utf8(std::string const& utf16); - // Convert from the specified single-byte encoding system to - // UTF-8. There is no ascii_to_utf8 because all ASCII strings are - // already valid UTF-8. + // Convert from the specified single-byte encoding system to UTF-8. There is no ascii_to_utf8 + // because all ASCII strings are already valid UTF-8. QPDF_DLL std::string win_ansi_to_utf8(std::string const& win); QPDF_DLL @@ -383,39 +345,33 @@ namespace QUtil QPDF_DLL std::string pdf_doc_to_utf8(std::string const& pdfdoc); - // Analyze a string for encoding. We can't tell the difference - // between any single-byte encodings, and we can't tell for sure - // whether a string that happens to be valid UTF-8 isn't a - // different encoding, but we can at least tell a few things to - // help us guess. If there are no characters with the high bit - // set, has_8bit_chars is false, and the other values are also - // false, even though ASCII strings are valid UTF-8. is_valid_utf8 - // means that the string is non-trivially valid UTF-8. Although - // the PDF spec requires UTF-16 to be UTF-16BE, qpdf (and just - // about everything else) accepts UTF-16LE (as of 10.6.2). + // Analyze a string for encoding. We can't tell the difference between any single-byte + // encodings, and we can't tell for sure whether a string that happens to be valid UTF-8 isn't a + // different encoding, but we can at least tell a few things to help us guess. If there are no + // characters with the high bit set, has_8bit_chars is false, and the other values are also + // false, even though ASCII strings are valid UTF-8. is_valid_utf8 means that the string is + // non-trivially valid UTF-8. Although the PDF spec requires UTF-16 to be UTF-16BE, qpdf (and + // just about everything else) accepts UTF-16LE (as of 10.6.2). QPDF_DLL void analyze_encoding( std::string const& str, bool& has_8bit_chars, bool& is_valid_utf8, bool& is_utf16); - // Try to compensate for previously incorrectly encoded strings. - // We want to compensate for the following errors: + // Try to compensate for previously incorrectly encoded strings. We want to compensate for the + // following errors: // - // * The string was supposed to be UTF-8 but was one of the - // single-byte encodings - // * The string was supposed to be PDF Doc but was either UTF-8 or - // one of the other single-byte encodings + // * The string was supposed to be UTF-8 but was one of the single-byte encodings + // * The string was supposed to be PDF Doc but was either UTF-8 or one of the other single-byte + // encodings // - // The returned vector always contains the original string first, - // and then it contains what the correct string would be in the - // event that the original string was the result of any of the + // The returned vector always contains the original string first, and then it contains what the + // correct string would be in the event that the original string was the result of any of the // above errors. // - // This method is useful for attempting to recover a password that - // may have been previously incorrectly encoded. For example, the - // password was supposed to be UTF-8 but the previous application - // used a password encoded in WinAnsi, or if the previous password - // was supposed to be PDFDoc but was actually given as UTF-8 or - // WinAnsi, this method would find the correct password. + // This method is useful for attempting to recover a password that may have been previously + // incorrectly encoded. For example, the password was supposed to be UTF-8 but the previous + // application used a password encoded in WinAnsi, or if the previous password was supposed to + // be PDFDoc but was actually given as UTF-8 or WinAnsi, this method would find the correct + // password. QPDF_DLL std::vector<std::string> possible_repaired_encodings(std::string); @@ -427,30 +383,25 @@ namespace QUtil QPDF_DLL void initializeWithRandomBytes(unsigned char* data, size_t len); - // Supply a random data provider. Starting in qpdf 10.0.0, qpdf - // uses the crypto provider as its source of random numbers. If - // you are using the native crypto provider, then qpdf will either - // use the operating system's secure random number source or, only - // if enabled at build time, an insecure random source from - // stdlib. The caller is responsible for managing the memory for - // the RandomDataProvider. This method modifies a static variable. - // If you are providing your own random data provider, you should - // call this at the beginning of your program before creating any - // QPDF objects. Passing a null to this method will reset the - // library back to its default random data provider. + // Supply a random data provider. Starting in qpdf 10.0.0, qpdf uses the crypto provider as its + // source of random numbers. If you are using the native crypto provider, then qpdf will either + // use the operating system's secure random number source or, only if enabled at build time, an + // insecure random source from stdlib. The caller is responsible for managing the memory for the + // RandomDataProvider. This method modifies a static variable. If you are providing your own + // random data provider, you should call this at the beginning of your program before creating + // any QPDF objects. Passing a null to this method will reset the library back to its default + // random data provider. QPDF_DLL void setRandomDataProvider(RandomDataProvider*); - // This returns the random data provider that would be used the - // next time qpdf needs random data. It will never return null. - // If no random data provider has been provided and the library - // was not compiled with any random data provider available, an - // exception will be thrown. + // This returns the random data provider that would be used the next time qpdf needs random + // data. It will never return null. If no random data provider has been provided and the + // library was not compiled with any random data provider available, an exception will be + // thrown. QPDF_DLL RandomDataProvider* getRandomDataProvider(); - // Filename is UTF-8 encoded, even on Windows, as described in the - // comments for safe_fopen. + // Filename is UTF-8 encoded, even on Windows, as described in the comments for safe_fopen. QPDF_DLL std::list<std::string> read_lines_from_file(char const* filename, bool preserve_eol = false); QPDF_DLL @@ -471,15 +422,13 @@ namespace QUtil QPDF_DLL std::string read_file_into_string(FILE* f, std::string_view filename = ""); - // This used to be called strcasecmp, but that is a macro on some - // platforms, so we have to give it a name that is not likely to - // be a macro anywhere. + // This used to be called strcasecmp, but that is a macro on some platforms, so we have to give + // it a name that is not likely to be a macro anywhere. QPDF_DLL int str_compare_nocase(char const*, char const*); - // These routines help the tokenizer recognize certain character - // classes without using ctype, which we avoid because of locale - // considerations. + // These routines help the tokenizer recognize certain character classes without using ctype, + // which we avoid because of locale considerations. QPDF_DLL inline bool is_hex_digit(char); @@ -492,21 +441,19 @@ namespace QUtil QPDF_DLL inline bool is_number(char const*); - // This method parses the numeric range syntax used by the qpdf - // command-line tool. May throw std::runtime_error. + // This method parses the numeric range syntax used by the qpdf command-line tool. May throw + // std::runtime_error. QPDF_DLL std::vector<int> parse_numrange(char const* range, int max); #ifndef QPDF_NO_WCHAR_T - // If you are building qpdf on a stripped down system that doesn't - // have wchar_t, such as may be the case in some embedded - // environments, you may define QPDF_NO_WCHAR_T in your build. - // This symbol is never defined automatically. Search for wchar_t - // in qpdf's top-level README.md file for details. + // If you are building qpdf on a stripped down system that doesn't have wchar_t, such as may be + // the case in some embedded environments, you may define QPDF_NO_WCHAR_T in your build. This + // symbol is never defined automatically. Search for wchar_t in qpdf's top-level README.md file + // for details. - // Take an argv array consisting of wchar_t, as when wmain is - // invoked, convert all UTF-16 encoded strings to UTF-8, and call - // another main. + // Take an argv array consisting of wchar_t, as when wmain is invoked, convert all UTF-16 + // encoded strings to UTF-8, and call another main. QPDF_DLL int call_main_from_wmain(int argc, wchar_t* argv[], std::function<int(int, char*[])> realmain); QPDF_DLL @@ -516,13 +463,10 @@ namespace QUtil std::function<int(int, char const* const[])> realmain); #endif // QPDF_NO_WCHAR_T - // Try to return the maximum amount of memory allocated by the - // current process and its threads. Return 0 if unable to - // determine. This is Linux-specific and not implemented to be - // completely reliable. It is used during development for - // performance testing to detect changes that may significantly - // change memory usage. It is not recommended for use for other - // purposes. + // Try to return the maximum amount of memory allocated by the current process and its threads. + // Return 0 if unable to determine. This is Linux-specific and not implemented to be completely + // reliable. It is used during development for performance testing to detect changes that may + // significantly change memory usage. It is not recommended for use for other purposes. QPDF_DLL size_t get_max_memory_usage(); }; // namespace QUtil |