aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2021-12-19 19:52:19 +0100
committerJay Berkenbilt <ejb@ql.org>2021-12-19 20:30:45 +0100
commitea73bf72e0ff2577672eec405380456fa56bc6af (patch)
tree60de3d8de4d03a99a3391a4e286eb7893eb20408
parent92613a1eec543a00dd25dc6cdd407929323a811c (diff)
downloadqpdf-ea73bf72e0ff2577672eec405380456fa56bc6af.tar.zst
Further improvements to handling binary strings
-rw-r--r--ChangeLog8
-rw-r--r--include/qpdf/qpdf-c.h43
-rw-r--r--libqpdf/qpdf-c.cc22
-rw-r--r--manual/release-notes.rst10
-rw-r--r--qpdf/qpdf-ctest.c11
-rw-r--r--qpdf/qpdf.testcov2
6 files changed, 83 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 5f02ba43..23c213a6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2021-12-19 Jay Berkenbilt <ejb@ql.org>
+
+ * C API: clarify documentation around string lengths. Add two new
+ methods: qpdf_oh_get_binary_string_value and
+ qpdf_oh_new_binary_string to make the need to handle the length
+ and data separate in more explicit in cases in which the string
+ data may contain embedded null characters.
+
2021-12-17 Jay Berkenbilt <ejb@ql.org>
* C API: simplify error handling for uncaught errors (never in a
diff --git a/include/qpdf/qpdf-c.h b/include/qpdf/qpdf-c.h
index e261c7c1..7369e616 100644
--- a/include/qpdf/qpdf-c.h
+++ b/include/qpdf/qpdf-c.h
@@ -61,10 +61,12 @@
* subsequent function calls, sometimes even to different
* functions. If you want a string to last past the next qpdf call
* or after a call to qpdf_cleanup, you should make a copy of it.
- * It is possible for the internal string data to contain null
- * characters. To handle that case, you call
- * qpdf_get_last_string_length() to get the length of whatever
- * string was just returned.
+ *
+ * Since it is possible for a PDF string to contain null
+ * characters, a function that returns data originating from a PDF
+ * string may also contain null characters. To handle that case,
+ * you call qpdf_get_last_string_length() to get the length of
+ * whatever string was just returned. See STRING FUNCTIONS below.
*
* Most functions defined here have obvious counterparts that are
* methods to either QPDF or QPDFWriter. Please see comments in
@@ -189,14 +191,6 @@ extern "C" {
QPDF_DLL
void qpdf_cleanup(qpdf_data* qpdf);
- /* Return the length of the last string returned. This enables you
- * to retrieve the entire string for cases in which a char*
- * returned by one of the functions below points to a string with
- * embedded null characters.
- */
- QPDF_DLL
- size_t qpdf_get_last_string_length(qpdf_data qpdf);
-
/* ERROR REPORTING */
/* Returns 1 if there is an error condition. The error condition
@@ -716,10 +710,29 @@ extern "C" {
QPDF_DLL
char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh);
+ /* Return the length of the last string returned. This enables you
+ * to retrieve the entire string for cases in which a char*
+ * returned by one of the functions below points to a string with
+ * embedded null characters. The function
+ * qpdf_oh_get_binary_string_value takes a length pointer, which
+ * can be useful if you are retrieving the value of a string that
+ * is expected to contain binary data, such as a checksum or
+ * document ID. It is always valid to call
+ * qpdf_get_last_string_length, but it is usually not necessary as
+ * C strings returned by the library are only expected to be able
+ * to contain null characters if their values originate from PDF
+ * strings in the input.
+ */
+ QPDF_DLL
+ size_t qpdf_get_last_string_length(qpdf_data qpdf);
+
QPDF_DLL
char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh);
QPDF_DLL
char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh);
+ QPDF_DLL
+ char const* qpdf_oh_get_binary_string_value(
+ qpdf_data qpdf, qpdf_oh oh, size_t* length);
QPDF_DLL
int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh);
@@ -772,6 +785,12 @@ extern "C" {
qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str);
QPDF_DLL
qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str);
+ /* Use qpdf_oh_new_binary_string for creating a string that may
+ * contain atrbitary binary data including embedded null characters.
+ */
+ QPDF_DLL
+ qpdf_oh qpdf_oh_new_binary_string(
+ qpdf_data qpdf, char const* str, size_t length);
QPDF_DLL
qpdf_oh qpdf_oh_new_array(qpdf_data qpdf);
QPDF_DLL
diff --git a/libqpdf/qpdf-c.cc b/libqpdf/qpdf-c.cc
index 9593a011..5f702272 100644
--- a/libqpdf/qpdf-c.cc
+++ b/libqpdf/qpdf-c.cc
@@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh)
});
}
+char const* qpdf_oh_get_binary_string_value(
+ qpdf_data qpdf, qpdf_oh oh, size_t* length)
+{
+ return do_with_oh<char const*>(
+ qpdf, oh,
+ return_T<char const*>(""),
+ [qpdf, length](QPDFObjectHandle& o) {
+ QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_binary_string_value");
+ qpdf->tmp_string = o.getStringValue();
+ *length = qpdf->tmp_string.length();
+ return qpdf->tmp_string.c_str();
+ });
+}
+
int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh)
{
return do_with_oh<int>(
@@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str)
return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str));
}
+qpdf_oh qpdf_oh_new_binary_string(
+ qpdf_data qpdf, char const* str, size_t length)
+{
+ QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_binary_string");
+ return new_object(
+ qpdf, QPDFObjectHandle::newString(std::string(str, length)));
+}
+
qpdf_oh qpdf_oh_new_array(qpdf_data qpdf)
{
QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array");
diff --git a/manual/release-notes.rst b/manual/release-notes.rst
index e5d73c90..08187ae5 100644
--- a/manual/release-notes.rst
+++ b/manual/release-notes.rst
@@ -46,6 +46,12 @@ For a detailed list of changes, please see the file
- C API Enhancements
+ - Many thanks to M. Holger whose contributions have heavily
+ influenced these C API enhancements. His several suggestions,
+ pull requests, questions, and critical reading of documentation
+ and comments have resulted in significant usability improvements
+ to the C API.
+
- Overhaul error handling for the object handle functions C API.
Some rare error conditions that would previously have caused a
crash are now trapped and reported, and the functions that
@@ -80,6 +86,10 @@ For a detailed list of changes, please see the file
- Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``.
+ - Add ``qpdf_oh_get_binary_string_value`` and
+ ``qpdf_oh_new_binary_string`` for making it easier to deal with
+ strings that contain embedded null characters.
+
10.4.0: November 16, 2021
- Handling of Weak Cryptography Algorithms
diff --git a/qpdf/qpdf-ctest.c b/qpdf/qpdf-ctest.c
index dbad4e99..953b24d0 100644
--- a/qpdf/qpdf-ctest.c
+++ b/qpdf/qpdf-ctest.c
@@ -781,8 +781,17 @@ static void test27(char const* infile,
assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
"one") == 0);
assert(qpdf_get_last_string_length(qpdf) == 7);
+ /* memcmp adds a character to verify the trailing null */
assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
- "one\000two", 7) == 0);
+ "one\000two", 8) == 0);
+ size_t length = 0;
+ p_string_with_null = qpdf_oh_new_binary_string(qpdf, "potato\000salad", 12);
+ /* memcmp adds a character to verify the trailing null */
+ assert(memcmp(qpdf_oh_get_binary_string_value(
+ qpdf, p_string_with_null, &length),
+ "potato\000salad", 13) == 0);
+ assert(qpdf_get_last_string_length(qpdf) == 12);
+ assert(length == 12);
}
static void test28(char const* infile,
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 034f8d8e..35417d4c 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -625,3 +625,5 @@ qpdf-c stream data buf set 1
qpdf-c called qpdf_oh_get_page_content_data 0
qpdf-c called qpdf_oh_replace_stream_data 0
qpdf-c silence oh errors 0
+qpdf-c called qpdf_oh_get_binary_string_value 0
+qpdf-c called qpdf_oh_new_binary_string 0