aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--include/qpdf/Constants.h10
-rw-r--r--include/qpdf/QPDFJob.hh21
-rw-r--r--include/qpdf/QPDFPageLabelDocumentHelper.hh5
-rw-r--r--libqpdf/QPDFJob.cc38
-rw-r--r--libqpdf/QPDFJob_config.cc49
-rw-r--r--libqpdf/QPDFPageLabelDocumentHelper.cc33
-rw-r--r--manual/release-notes.rst15
-rw-r--r--qpdf/qtest/page-labels.test39
-rw-r--r--qpdf/qtest/qpdf/set-page-labels.json11
-rw-r--r--qpdf/qtest/qpdf/set-page-labels.out8
11 files changed, 226 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index c5336c11..62f1a541 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2024-01-05 Jay Berkenbilt <ejb@ql.org>
+
+ * Add --set-page-labels command-line argument and supporting API.
+ Fixes #939.
+ - QPDFJob::Config::setPageLabels
+ - pdf_page_label_e enumerated type
+ - QPDFPageLabelDocumentHelper::pageLabelDict
+
2024-01-01 Jay Berkenbilt <ejb@ql.org>
* Support comma-separated numeric values with --collate to select
diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h
index 24f7dc3a..89152a91 100644
--- a/include/qpdf/Constants.h
+++ b/include/qpdf/Constants.h
@@ -232,4 +232,14 @@ enum pdf_annotation_flag_e {
/* Encryption/password status for QPDFJob */
enum qpdf_encryption_status_e { qpdf_es_encrypted = 1 << 0, qpdf_es_password_incorrect = 1 << 1 };
+/* Page label types */
+enum qpdf_page_label_e {
+ pl_none,
+ pl_digits,
+ pl_alpha_lower,
+ pl_alpha_upper,
+ pl_roman_lower,
+ pl_roman_upper,
+};
+
#endif /* QPDFCONSTANTS_H */
diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh
index 9a7afb0f..4626c264 100644
--- a/include/qpdf/QPDFJob.hh
+++ b/include/qpdf/QPDFJob.hh
@@ -296,7 +296,8 @@ class QPDFJob
Config* config;
};
- class PageLabelsConfig {
+ class PageLabelsConfig
+ {
friend class QPDFJob;
friend class Config;
@@ -458,6 +459,22 @@ class QPDFJob
std::vector<int> repeat_pagenos;
};
+ struct PageLabelSpec
+ {
+ PageLabelSpec(
+ int first_page, qpdf_page_label_e label_type, int start_num, std::string_view prefix) :
+ first_page(first_page),
+ label_type(label_type),
+ start_num(start_num),
+ prefix(prefix)
+ {
+ }
+ int first_page;
+ qpdf_page_label_e label_type;
+ int start_num{1};
+ std::string prefix;
+ };
+
// Helper functions
static void usage(std::string const& msg);
static JSON json_schema(int json_version, std::set<std::string>* keys = nullptr);
@@ -694,7 +711,7 @@ class QPDFJob
bool json_output{false};
std::string update_from_json;
bool report_mem_usage{false};
- std::vector<std::string> page_label_specs;
+ std::vector<PageLabelSpec> page_label_specs;
};
std::shared_ptr<Members> m;
};
diff --git a/include/qpdf/QPDFPageLabelDocumentHelper.hh b/include/qpdf/QPDFPageLabelDocumentHelper.hh
index eb610544..57b6919c 100644
--- a/include/qpdf/QPDFPageLabelDocumentHelper.hh
+++ b/include/qpdf/QPDFPageLabelDocumentHelper.hh
@@ -49,6 +49,11 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper
QPDF_DLL
bool hasPageLabels();
+ // Helper function to create a dictionary suitable for adding to the /PageLabels numbers tree.
+ QPDF_DLL
+ static QPDFObjectHandle
+ pageLabelDict(qpdf_page_label_e label_type, int start_num, std::string_view prefix);
+
// Return a page label dictionary representing the page label for the given page. The page does
// not need to appear explicitly in the page label dictionary. This method will adjust /St as
// needed to produce a label that is suitable for the page.
diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc
index b0910cbd..2e0abb43 100644
--- a/libqpdf/QPDFJob.cc
+++ b/libqpdf/QPDFJob.cc
@@ -2172,6 +2172,37 @@ QPDFJob::handleTransformations(QPDF& pdf)
if (m->remove_page_labels) {
pdf.getRoot().removeKey("/PageLabels");
}
+ if (!m->page_label_specs.empty()) {
+ auto nums = QPDFObjectHandle::newArray();
+ auto n_pages = QIntC::to_int(dh.getAllPages().size());
+ int last_page_seen{0};
+ for (auto& spec: m->page_label_specs) {
+ if (spec.first_page < 0) {
+ spec.first_page = n_pages + 1 + spec.first_page;
+ }
+ if (last_page_seen == 0) {
+ if (spec.first_page != 1) {
+ throw std::runtime_error(
+ "the first page label specification must start with page 1");
+ }
+ } else if (spec.first_page <= last_page_seen) {
+ throw std::runtime_error(
+ "page label specifications must be in order by first page");
+ }
+ if (spec.first_page > n_pages) {
+ throw std::runtime_error(
+ "page label spec: page " + std::to_string(spec.first_page) +
+ " is more than the total number of pages (" + std::to_string(n_pages) + ")");
+ }
+ last_page_seen = spec.first_page;
+ nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1));
+ nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict(
+ spec.label_type, spec.start_num, spec.prefix));
+ }
+ auto page_labels = QPDFObjectHandle::newDictionary();
+ page_labels.replaceKey("/Nums", nums);
+ pdf.getRoot().replaceKey("/PageLabels", page_labels);
+ }
if (!m->attachments_to_remove.empty()) {
QPDFEmbeddedFileDocumentHelper efdh(pdf);
for (auto const& key: m->attachments_to_remove) {
@@ -3019,10 +3050,9 @@ QPDFJob::writeOutfile(QPDF& pdf)
try {
QUtil::remove_file(backup.c_str());
} catch (QPDFSystemError& e) {
- *m->log->getError()
- << m->message_prefix << ": unable to delete original file (" << e.what() << ");"
- << " original file left in " << backup
- << ", but the input was successfully replaced\n";
+ *m->log->getError() << m->message_prefix << ": unable to delete original file ("
+ << e.what() << ");" << " original file left in " << backup
+ << ", but the input was successfully replaced\n";
}
}
}
diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc
index 9651c3b9..e48d7e31 100644
--- a/libqpdf/QPDFJob_config.cc
+++ b/libqpdf/QPDFJob_config.cc
@@ -1,5 +1,7 @@
#include <qpdf/QPDFJob.hh>
+#include <regex>
+
#include <qpdf/QPDFLogger.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
@@ -1062,11 +1064,50 @@ QPDFJob::Config::encrypt(
QPDFJob::Config*
QPDFJob::Config::setPageLabels(const std::vector<std::string>& specs)
{
- // XXX validate
- for (auto const& xxx: specs) {
- std::cout << "XXX config: spec: " << xxx << std::endl;
+ static std::regex page_label_re(R"(^(z|r?\d+):([DaArR])?(?:/(\d+)?(?:/(.+)?)?)?$)");
+ o.m->page_label_specs.clear();
+ for (auto const& spec: specs) {
+ std::smatch match;
+ if (!std::regex_match(spec, match, page_label_re)) {
+ usage("page label spec must be n:[D|a|A|r|R][/start[/prefix]]");
+ }
+ auto first_page_str = match[1].str();
+ int first_page;
+ if (first_page_str == "z") {
+ first_page = -1;
+ } else if (first_page_str.at(0) == 'r') {
+ first_page = -QUtil::string_to_int(first_page_str.substr(1).c_str());
+ } else {
+ first_page = QUtil::string_to_int(first_page_str.c_str());
+ }
+ auto label_type_ch = match[2].matched ? match[2].str().at(0) : '\0';
+ qpdf_page_label_e label_type;
+ switch (label_type_ch) {
+ case 'D':
+ label_type = pl_digits;
+ break;
+ case 'a':
+ label_type = pl_alpha_lower;
+ break;
+ case 'A':
+ label_type = pl_alpha_upper;
+ break;
+ case 'r':
+ label_type = pl_roman_lower;
+ break;
+ case 'R':
+ label_type = pl_roman_upper;
+ break;
+ default:
+ label_type = pl_none;
+ }
+
+ auto start_num = match[3].matched ? QUtil::string_to_int(match[3].str().c_str()) : 1;
+ auto prefix = match[4].matched ? match[4].str() : "";
+ // We can't check ordering until we know how many pages there are, so that is delayed until
+ // near the end.
+ o.m->page_label_specs.emplace_back(first_page, label_type, start_num, prefix);
}
- o.m->page_label_specs = specs;
return this;
}
diff --git a/libqpdf/QPDFPageLabelDocumentHelper.cc b/libqpdf/QPDFPageLabelDocumentHelper.cc
index d94c41fd..e291e9aa 100644
--- a/libqpdf/QPDFPageLabelDocumentHelper.cc
+++ b/libqpdf/QPDFPageLabelDocumentHelper.cc
@@ -99,3 +99,36 @@ QPDFPageLabelDocumentHelper::getLabelsForPageRange(
}
}
}
+
+QPDFObjectHandle
+QPDFPageLabelDocumentHelper::pageLabelDict(
+ qpdf_page_label_e label_type, int start_num, std::string_view prefix)
+{
+ auto num = "<< /Type /PageLabel >>"_qpdf;
+ switch (label_type) {
+ case pl_none:
+ break;
+ case pl_digits:
+ num.replaceKey("/S", "/D"_qpdf);
+ break;
+ case pl_alpha_lower:
+ num.replaceKey("/S", "/a"_qpdf);
+ break;
+ case pl_alpha_upper:
+ num.replaceKey("/S", "/A"_qpdf);
+ break;
+ case pl_roman_lower:
+ num.replaceKey("/S", "/r"_qpdf);
+ break;
+ case pl_roman_upper:
+ num.replaceKey("/S", "/R"_qpdf);
+ break;
+ }
+ if (!prefix.empty()) {
+ num.replaceKey("/P", QPDFObjectHandle::newUnicodeString(std::string(prefix)));
+ }
+ if (start_num != 1) {
+ num.replaceKey("/St", QPDFObjectHandle::newInteger(start_num));
+ }
+ return num;
+}
diff --git a/manual/release-notes.rst b/manual/release-notes.rst
index 72b1aad6..6fb4758a 100644
--- a/manual/release-notes.rst
+++ b/manual/release-notes.rst
@@ -38,7 +38,7 @@ Planned changes for future 12.x (subject to change):
.. x.y.z: not yet released
-11.7.1: not yet released
+11.8.0: not yet released
- Bug fixes:
- When flattening annotations, preserve hyperlinks and other
@@ -54,6 +54,19 @@ Planned changes for future 12.x (subject to change):
:qpdf:ref:`--collate` to select different numbers of pages from
different groups.
+ - Add :qpdf:ref:`--set-page-labels` option to completely override
+ page labels in the output.
+
+ - Library Enhancements
+
+ - Add API to support :qpdf:ref:`--set-page-labels`:
+
+ - ``QPDFJob::Config::setPageLabels``
+
+ - ``pdf_page_label_e`` enumerated type
+
+ - ``QPDFPageLabelDocumentHelper::pageLabelDict``
+
11.7.0: December 24, 2023
- Bug fixes:
diff --git a/qpdf/qtest/page-labels.test b/qpdf/qtest/page-labels.test
index 616dcb6a..9927437b 100644
--- a/qpdf/qtest/page-labels.test
+++ b/qpdf/qtest/page-labels.test
@@ -29,5 +29,44 @@ $td->runtest("no page labels",
{$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+# --set-page-labels
+my @errors = (
+ ["quack", ".*page label spec must be.*"],
+ ["5:r 10:D", ".*the first page .*must start with page 1.*"],
+ ["1:r 10:D 31:A",
+ ".*page 31 is more than the total number of pages \\(30\\).*"],
+);
+$n_tests += scalar(@errors);
+
+foreach my $d (@errors)
+{
+ my ($specs, $err) = @$d;
+ $td->runtest("error ($specs)",
+ {$td->COMMAND => "qpdf --set-page-labels $specs --" .
+ " page-labels-num-tree.pdf a.pdf"},
+ {$td->REGEXP => $err, $td->EXIT_STATUS => 2},
+ $td->NORMALIZE_NEWLINES);
+}
+
+$n_tests += 4;
+$td->runtest("set page labels",
+ {$td->COMMAND => "qpdf page-labels-num-tree.pdf a.pdf" .
+ " --set-page-labels 1:a 3:R/2 6:r//Z- 8:A/17 r10:D/3 28: z://end --"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("after set page labels",
+ {$td->COMMAND => "test_driver 47 a.pdf"},
+ {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("set page labels (json)",
+ {$td->COMMAND => "qpdf page-labels-num-tree.pdf b.pdf" .
+ " --job-json-file=set-page-labels.json"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("after set page labels",
+ {$td->COMMAND => "test_driver 47 b.pdf"},
+ {$td->FILE => "set-page-labels.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
cleanup();
$td->report($n_tests);
diff --git a/qpdf/qtest/qpdf/set-page-labels.json b/qpdf/qtest/qpdf/set-page-labels.json
new file mode 100644
index 00000000..e3a3daf8
--- /dev/null
+++ b/qpdf/qtest/qpdf/set-page-labels.json
@@ -0,0 +1,11 @@
+{
+ "setPageLabels": [
+ "1:a",
+ "3:R/2",
+ "6:r//Z-",
+ "8:A/17",
+ "r10:D/3",
+ "28:",
+ "z://end"
+ ]
+}
diff --git a/qpdf/qtest/qpdf/set-page-labels.out b/qpdf/qtest/qpdf/set-page-labels.out
new file mode 100644
index 00000000..b0bfe14a
--- /dev/null
+++ b/qpdf/qtest/qpdf/set-page-labels.out
@@ -0,0 +1,8 @@
+1 << /S /a /St 1 >>
+3 << /S /R /St 2 >>
+6 << /P (Z-) /S /r /St 1 >>
+8 << /S /A /St 17 >>
+21 << /S /D /St 3 >>
+28 << /St 1 >>
+30 << /P (end) /St 1 >>
+test 47 done