From 22b35c49289157204b35a851f3cb9cade9e98559 Mon Sep 17 00:00:00 2001
From: Jay Berkenbilt <ejb@ql.org>
Date: Sat, 23 Apr 2022 16:39:27 -0400
Subject: Expose QUtil::get_next_utf8_codepoint

---
 include/qpdf/QUtil.hh | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index 2d0b7b56..8b2b5ff8 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -268,14 +268,33 @@ namespace QUtil
     QPDF_DLL
     std::string toUTF16(unsigned long uval);
 
+    // If utf8_val.at(pos) points to the beginning of a valid
+    // UTF-8-encoded character, return the codepoint of the character
+    // and set error to false. Otherwise, return 0xfffd and set error
+    // to true. In all cases, pos is advanced to the next position
+    // that may begin a valid character. When the string has been
+    // consumed, pos will be set to the string length. It is an error
+    // to pass a value of pos that is greater than or equal to the
+    // length of the string.
+    QPDF_DLL
+    unsigned long get_next_utf8_codepoint(
+        std::string const& utf8_val, size_t& pos, bool& error);
+
     // Test whether this is a UTF-16 string. This is indicated by
     // first two bytes being 0xFE 0xFF (big-endian) or 0xFF 0xFE
-    // (little-endian). Starting in qpdf 10.6.2, this detects
+    // (little-endian), each of which is the encoding of U+FEFF, the
+    // Unicode marker. Starting in qpdf 10.6.2, this detects
     // little-endian as well as big-endian. Even though the PDF spec
     // doesn't allow little-endian, most readers seem to accept it.
     QPDF_DLL
     bool is_utf16(std::string const&);
 
+    // Test whether this is an explicit UTF-8 string as allowed by the
+    // PDF 2.0 spec. This is indicated by first three bytes being 0xEF
+    // 0xBB 0xBF, which is the UTF-8 encoding of U+FEFF.
+    QPDF_DLL
+    bool is_explicit_utf8(std::string const&);
+
     // Convert a UTF-8 encoded string to UTF-16 big-endian.
     // Unrepresentable code points are converted to U+FFFD.
     QPDF_DLL
-- 
cgit v1.2.3-70-g09d2