aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--TODO4
-rw-r--r--cSpell.json2
-rw-r--r--include/qpdf/QPDFJob.hh1
-rw-r--r--include/qpdf/QUtil.hh12
-rw-r--r--include/qpdf/auto_job_c_main.hh1
-rw-r--r--job.sums14
-rw-r--r--job.yml2
-rw-r--r--libqpdf/CMakeLists.txt23
-rw-r--r--libqpdf/QPDFJob.cc11
-rw-r--r--libqpdf/QPDFJob_config.cc7
-rw-r--r--libqpdf/QUtil.cc73
-rw-r--r--libqpdf/qpdf/auto_job_help.hh3
-rw-r--r--libqpdf/qpdf/auto_job_init.hh1
-rw-r--r--libqpdf/qpdf/auto_job_json_init.hh3
-rw-r--r--libqpdf/qpdf/auto_job_schema.hh1
-rw-r--r--libqpdf/qpdf/qpdf-config.h.in2
-rw-r--r--libtests/qtest/qutil/qutil.out2
-rw-r--r--libtests/qutil.cc14
-rw-r--r--manual/cli.rst10
19 files changed, 176 insertions, 10 deletions
diff --git a/TODO b/TODO
index a4628397..f664353e 100644
--- a/TODO
+++ b/TODO
@@ -4,10 +4,12 @@ Next
Before Release:
-* Review in order #729, #726, #747
+* Review in order #726
* Make ./performance_check usable by other people by having published
files to use for testing.
* https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf
+* Incorporate --report-mem-usage into performance testing. Make sure
+ there is some test somewhere that exercises the millions of nulls case.
* Evaluate issues tagged with `next`
* Stay on top of https://github.com/pikepdf/pikepdf/pull/315
diff --git a/cSpell.json b/cSpell.json
index 6251d984..88f7e22d 100644
--- a/cSpell.json
+++ b/cSpell.json
@@ -131,6 +131,7 @@
"esize",
"eval",
"extlibdir",
+ "fclose",
"fdict",
"ffield",
"fghij",
@@ -268,6 +269,7 @@
"maxdepth",
"maxobjectid",
"mdash",
+ "memstream",
"mindepth",
"mkdir",
"mkinstalldirs",
diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh
index 0e4d8a2e..2d4ab0d2 100644
--- a/include/qpdf/QPDFJob.hh
+++ b/include/qpdf/QPDFJob.hh
@@ -711,6 +711,7 @@ class QPDFJob
bool json_input;
bool json_output;
std::string update_from_json;
+ bool report_mem_usage;
};
std::shared_ptr<Members> m;
};
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index 41b89da4..96f4f7ed 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -525,7 +525,17 @@ namespace QUtil
wchar_t const* const argv[],
std::function<int(int, char const* const[])> realmain);
#endif // QPDF_NO_WCHAR_T
-}; // namespace QUtil
+
+ // Try to return the maximum amount of memory allocated by the
+ // current process and its threads. Return 0 if unable to
+ // determine. This is Linux-specific and not implemented to be
+ // completely reliable. It is used during development for
+ // performance testing to detect changes that may significantly
+ // change memory usage. It is not recommended for use for other
+ // purposes.
+ QPDF_DLL
+ size_t get_max_memory_usage();
+}; // namespace QUtil
inline bool
QUtil::is_hex_digit(char ch)
diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh
index 90927ded..cc655c23 100644
--- a/include/qpdf/auto_job_c_main.hh
+++ b/include/qpdf/auto_job_c_main.hh
@@ -33,6 +33,7 @@ QPDF_DLL Config* qdf();
QPDF_DLL Config* rawStreamData();
QPDF_DLL Config* recompressFlate();
QPDF_DLL Config* removePageLabels();
+QPDF_DLL Config* reportMemUsage();
QPDF_DLL Config* requiresPassword();
QPDF_DLL Config* showEncryption();
QPDF_DLL Config* showEncryptionKey();
diff --git a/job.sums b/job.sums
index 5ffcd6cd..e194572e 100644
--- a/job.sums
+++ b/job.sums
@@ -3,15 +3,15 @@ generate_auto_job 9abe2ec994fb98526f5e3c0c199ce2e61a868463cb522a5bc6e9730b655341
include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
-include/qpdf/auto_job_c_main.hh cdba1ae6ea5525a585d10a3dd95b7996d62b17de4211fe658b78d9d463b0f313
+include/qpdf/auto_job_c_main.hh 493b9798f5ff8bbcb07c0238693554d77eefa4ae71ce1a0d466de94e3a7a3966
include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c18911614fe8e568ec
include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
-job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412
+job.yml a6f22d425980ed960c77c0a4197f46924c14e7943358cd9f0b75811bb1c480ad
libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da
-libqpdf/qpdf/auto_job_help.hh 53306e4aef8aaca641c0087bc9e064ada1c44a94b826c0bcac7b4eb0c8c41fd5
-libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2
+libqpdf/qpdf/auto_job_help.hh 3e9385a7e0dae993467647466fa30f30baa5968f9270c73ff4e664f5aa415dbe
+libqpdf/qpdf/auto_job_init.hh ccb881733849dff5c05721f1aa5c35447cedd415e881c3fef6573901e45be056
libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
-libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268
-libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b
+libqpdf/qpdf/auto_job_json_init.hh 7ac8f42fb39eda56144ab62b30152a56e9bb2224d0596eb826b7bc421a78d26b
+libqpdf/qpdf/auto_job_schema.hh 17352791b09c3b8a8db766375cce31d70c98b67b44ecc398e2ac78984e34fe90
manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
-manual/cli.rst 41ee93f23f46160fe9eaf7c99fd2ab3bd2e0f6792a341a35bdac1a41cb853ed5
+manual/cli.rst e3fa48bb30c981df1f74d474887155cd6a46f9010b91cd1b7b57e582bf3bf877
diff --git a/job.yml b/job.yml
index b411a9b0..5c9dac86 100644
--- a/job.yml
+++ b/job.yml
@@ -127,6 +127,7 @@ options:
- recompress-flate
- remove-page-labels
- replace-input
+ - report-mem-usage
- requires-password
- show-encryption
- show-encryption-key
@@ -413,6 +414,7 @@ json:
Pages.password:
_range: "page range"
remove-page-labels:
+ report-mem-usage:
rotate:
overlay:
_file: "source file for overlay"
diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt
index 46d35959..51f7476d 100644
--- a/libqpdf/CMakeLists.txt
+++ b/libqpdf/CMakeLists.txt
@@ -376,6 +376,29 @@ int main(int argc, char* argv[]) {
endif()
endfunction()
+check_c_source_compiles(
+"#include <malloc.h>
+#include <stdio.h>
+int main(int argc, char* argv[]) {
+ malloc_info(0, stdout);
+ return 0;
+}"
+ HAVE_MALLOC_INFO)
+
+check_c_source_compiles(
+"#include <stdio.h>
+#include <stdlib.h>
+int main(int argc, char* argv[]) {
+ char* buf;
+ size_t size;
+ FILE* f;
+ f = open_memstream(&buf, &size);
+ fclose(f);
+ free(buf);
+ return 0;
+}"
+ HAVE_OPEN_MEMSTREAM)
+
qpdf_check_ll_fmt("%lld" fmt_lld)
qpdf_check_ll_fmt("%I64d" fmt_i64d)
qpdf_check_ll_fmt("%I64lld" fmt_i64lld)
diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc
index 15a81854..7bd563aa 100644
--- a/libqpdf/QPDFJob.cc
+++ b/libqpdf/QPDFJob.cc
@@ -417,7 +417,8 @@ QPDFJob::Members::Members() :
check_is_encrypted(false),
check_requires_password(false),
json_input(false),
- json_output(false)
+ json_output(false),
+ report_mem_usage(false)
{
}
@@ -625,6 +626,14 @@ QPDFJob::run()
<< ": operation succeeded with warnings\n";
}
}
+ if (m->report_mem_usage) {
+ // Call get_max_memory_usage before generating output. When
+ // debugging, it's easier if print statements from
+ // get_max_memory_usage are not interleaved with the output.
+ auto mem_usage = QUtil::get_max_memory_usage();
+ *this->m->log->getWarn()
+ << "qpdf-max-memory-usage " << mem_usage << "\n";
+ }
}
bool
diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc
index 8a9c1470..3e148fca 100644
--- a/libqpdf/QPDFJob_config.cc
+++ b/libqpdf/QPDFJob_config.cc
@@ -503,6 +503,13 @@ QPDFJob::Config::removePageLabels()
}
QPDFJob::Config*
+QPDFJob::Config::reportMemUsage()
+{
+ o.m->report_mem_usage = true;
+ return this;
+}
+
+QPDFJob::Config*
QPDFJob::Config::requiresPassword()
{
o.m->check_requires_password = true;
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index d565ece0..98a8f318 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -37,6 +37,9 @@
# include <sys/stat.h>
# include <unistd.h>
#endif
+#ifdef HAVE_MALLOC_INFO
+# include <malloc.h>
+#endif
// First element is 24
static unsigned short pdf_doc_low_to_unicode[] = {
@@ -1968,3 +1971,73 @@ QUtil::call_main_from_wmain(
}
#endif // QPDF_NO_WCHAR_T
+
+size_t
+QUtil::get_max_memory_usage()
+{
+#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM)
+ static std::regex tag_re("<(/?\\w+)([^>]*?)>");
+ static std::regex attr_re("(\\w+)=\"(.*?)\"");
+
+ char* buf;
+ size_t size;
+ FILE* f = open_memstream(&buf, &size);
+ if (f == nullptr) {
+ return 0;
+ }
+ malloc_info(0, f);
+ fclose(f);
+ if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) {
+ fprintf(stderr, "%s", buf);
+ }
+
+ // Warning: this code uses regular expression to extract data from
+ // an XML string. This is generally a bad idea, but we're going to
+ // do it anyway because QUtil.hh warns against using this function
+ // for other than development/testing, and if this function fails
+ // to generate reasonable output during performance testing, it
+ // will be noticed.
+
+ // This is my best guess at how to interpret malloc_info. Anyway
+ // it seems to provide useful information for detecting code
+ // changes that drastically change memory usage.
+ size_t result = 0;
+ try {
+ std::cregex_iterator m_begin(buf, buf + size, tag_re);
+ std::cregex_iterator cr_end;
+ std::sregex_iterator sr_end;
+
+ int in_heap = 0;
+ for (auto m = m_begin; m != cr_end; ++m) {
+ std::string tag(m->str(1));
+ if (tag == "heap") {
+ ++in_heap;
+ } else if (tag == "/heap") {
+ --in_heap;
+ } else if (in_heap == 0) {
+ std::string rest = m->str(2);
+ std::map<std::string, std::string> attrs;
+ std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re);
+ for (auto m2 = a_begin; m2 != sr_end; ++m2) {
+ attrs[m2->str(1)] = m2->str(2);
+ }
+ if (tag == "total") {
+ if (attrs.count("size") > 0) {
+ result += QIntC::to_size(
+ QUtil::string_to_ull(attrs["size"].c_str()));
+ }
+ } else if (tag == "system" && attrs["type"] == "max") {
+ result += QIntC::to_size(
+ QUtil::string_to_ull(attrs["size"].c_str()));
+ }
+ }
+ }
+ } catch (...) {
+ // ignore -- just return 0
+ }
+ free(buf);
+ return result;
+#else
+ return 0;
+#endif
+}
diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh
index 7c3bb266..eb272a04 100644
--- a/libqpdf/qpdf/auto_job_help.hh
+++ b/libqpdf/qpdf/auto_job_help.hh
@@ -883,6 +883,9 @@ for debugging qpdf.
ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between
the output of qpdf --json and the output of qpdf --json-help.
)");
+ap.addOptionHelp("--report-mem-usage", "testing", "best effort report of memory usage", R"(This is used by qpdf's performance test suite to report the
+maximum amount of memory used in supported environments.
+)");
}
static void add_help(QPDFArgParser& ap)
{
diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh
index b90592e0..ad110d16 100644
--- a/libqpdf/qpdf/auto_job_init.hh
+++ b/libqpdf/qpdf/auto_job_init.hh
@@ -69,6 +69,7 @@ this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();});
this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput));
+this->ap.addBare("report-mem-usage", [this](){c_main->reportMemUsage();});
this->ap.addBare("requires-password", [this](){c_main->requiresPassword();});
this->ap.addBare("show-encryption", [this](){c_main->showEncryption();});
this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();});
diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh
index 8f8fb987..1cd69368 100644
--- a/libqpdf/qpdf/auto_job_json_init.hh
+++ b/libqpdf/qpdf/auto_job_json_init.hh
@@ -409,6 +409,9 @@ popHandler(); // key: pages
pushKey("removePageLabels");
addBare([this]() { c_main->removePageLabels(); });
popHandler(); // key: removePageLabels
+pushKey("reportMemUsage");
+addBare([this]() { c_main->reportMemUsage(); });
+popHandler(); // key: reportMemUsage
pushKey("rotate");
addParameter([this](std::string const& p) { c_main->rotate(p); });
popHandler(); // key: rotate
diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh
index aa69c192..9272c596 100644
--- a/libqpdf/qpdf/auto_job_schema.hh
+++ b/libqpdf/qpdf/auto_job_schema.hh
@@ -144,6 +144,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
}
],
"removePageLabels": "remove explicit page numbers",
+ "reportMemUsage": "best effort report of memory usage",
"rotate": "rotate pages",
"overlay": {
"file": "source file for overlay",
diff --git a/libqpdf/qpdf/qpdf-config.h.in b/libqpdf/qpdf/qpdf-config.h.in
index 8a22b875..500f55cc 100644
--- a/libqpdf/qpdf/qpdf-config.h.in
+++ b/libqpdf/qpdf/qpdf-config.h.in
@@ -21,6 +21,8 @@
#cmakedefine HAVE_LOCALTIME_R 1
#cmakedefine HAVE_RANDOM 1
#cmakedefine HAVE_TM_GMTOFF 1
+#cmakedefine HAVE_MALLOC_INFO 1
+#cmakedefine HAVE_OPEN_MEMSTREAM 1
/* printf format for long long */
#cmakedefine LL_FMT "${LL_FMT}"
diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out
index 48d22fb9..8d3e6d8e 100644
--- a/libtests/qtest/qutil/qutil.out
+++ b/libtests/qtest/qutil/qutil.out
@@ -134,3 +134,5 @@ D:20210209191925Z
2021-02-09T19:19:25Z
---- is_long_long
done
+---- memory usage
+memory usage okay
diff --git a/libtests/qutil.cc b/libtests/qutil.cc
index 995a7599..82c2dd1a 100644
--- a/libtests/qutil.cc
+++ b/libtests/qutil.cc
@@ -703,6 +703,18 @@ is_long_long_test()
std::cout << "done" << std::endl;
}
+void
+memory_usage_test()
+{
+ auto u1 = QUtil::get_max_memory_usage();
+ if (u1 > 0) {
+ auto x = QUtil::make_shared_array<int>(10 << 20);
+ auto u2 = QUtil::get_max_memory_usage();
+ assert(u2 > u1);
+ }
+ std::cout << "memory usage okay" << std::endl;
+}
+
int
main(int argc, char* argv[])
{
@@ -739,6 +751,8 @@ main(int argc, char* argv[])
timestamp_test();
std::cout << "---- is_long_long" << std::endl;
is_long_long_test();
+ std::cout << "---- memory usage" << std::endl;
+ memory_usage_test();
} catch (std::exception& e) {
std::cout << "unexpected exception: " << e.what() << std::endl;
}
diff --git a/manual/cli.rst b/manual/cli.rst
index 021d1dc7..57682c1a 100644
--- a/manual/cli.rst
+++ b/manual/cli.rst
@@ -3463,6 +3463,16 @@ Related Options
memory and is therefore unsuitable for use with large files. This
is why it's also not on by default.
+.. qpdf:option:: --report-mem-usage
+
+ .. help: best effort report of memory usage
+
+ This is used by qpdf's performance test suite to report the
+ maximum amount of memory used in supported environments.
+
+ This is used by qpdf's performance test suite to report the maximum
+ amount of memory used in supported environments.
+
.. _unicode-passwords:
Unicode Passwords