From 0a54247652e49ce384dcf0d8df078201aa106089 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 31 Aug 2022 12:49:29 -0400 Subject: Add QUtil::get_max_memory_usage for testing --- cSpell.json | 2 ++ include/qpdf/QUtil.hh | 12 ++++++- libqpdf/CMakeLists.txt | 23 +++++++++++++ libqpdf/QUtil.cc | 73 ++++++++++++++++++++++++++++++++++++++++++ libqpdf/qpdf/qpdf-config.h.in | 2 ++ libtests/qtest/qutil/qutil.out | 2 ++ libtests/qutil.cc | 14 ++++++++ 7 files changed, 127 insertions(+), 1 deletion(-) diff --git a/cSpell.json b/cSpell.json index 6251d984..88f7e22d 100644 --- a/cSpell.json +++ b/cSpell.json @@ -131,6 +131,7 @@ "esize", "eval", "extlibdir", + "fclose", "fdict", "ffield", "fghij", @@ -268,6 +269,7 @@ "maxdepth", "maxobjectid", "mdash", + "memstream", "mindepth", "mkdir", "mkinstalldirs", diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index 41b89da4..96f4f7ed 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -525,7 +525,17 @@ namespace QUtil wchar_t const* const argv[], std::function realmain); #endif // QPDF_NO_WCHAR_T -}; // namespace QUtil + + // Try to return the maximum amount of memory allocated by the + // current process and its threads. Return 0 if unable to + // determine. This is Linux-specific and not implemented to be + // completely reliable. It is used during development for + // performance testing to detect changes that may significantly + // change memory usage. It is not recommended for use for other + // purposes. + QPDF_DLL + size_t get_max_memory_usage(); +}; // namespace QUtil inline bool QUtil::is_hex_digit(char ch) diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt index cf807f6d..106292a3 100644 --- a/libqpdf/CMakeLists.txt +++ b/libqpdf/CMakeLists.txt @@ -375,6 +375,29 @@ int main(int argc, char* argv[]) { endif() endfunction() +check_c_source_compiles( +"#include +#include +int main(int argc, char* argv[]) { + malloc_info(0, stdout); + return 0; +}" + HAVE_MALLOC_INFO) + +check_c_source_compiles( +"#include +#include +int main(int argc, char* argv[]) { + char* buf; + size_t size; + FILE* f; + f = open_memstream(&buf, &size); + fclose(f); + free(buf); + return 0; +}" + HAVE_OPEN_MEMSTREAM) + qpdf_check_ll_fmt("%lld" fmt_lld) qpdf_check_ll_fmt("%I64d" fmt_i64d) qpdf_check_ll_fmt("%I64lld" fmt_i64lld) diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index d565ece0..98a8f318 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -37,6 +37,9 @@ # include # include #endif +#ifdef HAVE_MALLOC_INFO +# include +#endif // First element is 24 static unsigned short pdf_doc_low_to_unicode[] = { @@ -1968,3 +1971,73 @@ QUtil::call_main_from_wmain( } #endif // QPDF_NO_WCHAR_T + +size_t +QUtil::get_max_memory_usage() +{ +#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM) + static std::regex tag_re("<(/?\\w+)([^>]*?)>"); + static std::regex attr_re("(\\w+)=\"(.*?)\""); + + char* buf; + size_t size; + FILE* f = open_memstream(&buf, &size); + if (f == nullptr) { + return 0; + } + malloc_info(0, f); + fclose(f); + if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) { + fprintf(stderr, "%s", buf); + } + + // Warning: this code uses regular expression to extract data from + // an XML string. This is generally a bad idea, but we're going to + // do it anyway because QUtil.hh warns against using this function + // for other than development/testing, and if this function fails + // to generate reasonable output during performance testing, it + // will be noticed. + + // This is my best guess at how to interpret malloc_info. Anyway + // it seems to provide useful information for detecting code + // changes that drastically change memory usage. + size_t result = 0; + try { + std::cregex_iterator m_begin(buf, buf + size, tag_re); + std::cregex_iterator cr_end; + std::sregex_iterator sr_end; + + int in_heap = 0; + for (auto m = m_begin; m != cr_end; ++m) { + std::string tag(m->str(1)); + if (tag == "heap") { + ++in_heap; + } else if (tag == "/heap") { + --in_heap; + } else if (in_heap == 0) { + std::string rest = m->str(2); + std::map attrs; + std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re); + for (auto m2 = a_begin; m2 != sr_end; ++m2) { + attrs[m2->str(1)] = m2->str(2); + } + if (tag == "total") { + if (attrs.count("size") > 0) { + result += QIntC::to_size( + QUtil::string_to_ull(attrs["size"].c_str())); + } + } else if (tag == "system" && attrs["type"] == "max") { + result += QIntC::to_size( + QUtil::string_to_ull(attrs["size"].c_str())); + } + } + } + } catch (...) { + // ignore -- just return 0 + } + free(buf); + return result; +#else + return 0; +#endif +} diff --git a/libqpdf/qpdf/qpdf-config.h.in b/libqpdf/qpdf/qpdf-config.h.in index 8a22b875..500f55cc 100644 --- a/libqpdf/qpdf/qpdf-config.h.in +++ b/libqpdf/qpdf/qpdf-config.h.in @@ -21,6 +21,8 @@ #cmakedefine HAVE_LOCALTIME_R 1 #cmakedefine HAVE_RANDOM 1 #cmakedefine HAVE_TM_GMTOFF 1 +#cmakedefine HAVE_MALLOC_INFO 1 +#cmakedefine HAVE_OPEN_MEMSTREAM 1 /* printf format for long long */ #cmakedefine LL_FMT "${LL_FMT}" diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out index 48d22fb9..8d3e6d8e 100644 --- a/libtests/qtest/qutil/qutil.out +++ b/libtests/qtest/qutil/qutil.out @@ -134,3 +134,5 @@ D:20210209191925Z 2021-02-09T19:19:25Z ---- is_long_long done +---- memory usage +memory usage okay diff --git a/libtests/qutil.cc b/libtests/qutil.cc index 995a7599..82c2dd1a 100644 --- a/libtests/qutil.cc +++ b/libtests/qutil.cc @@ -703,6 +703,18 @@ is_long_long_test() std::cout << "done" << std::endl; } +void +memory_usage_test() +{ + auto u1 = QUtil::get_max_memory_usage(); + if (u1 > 0) { + auto x = QUtil::make_shared_array(10 << 20); + auto u2 = QUtil::get_max_memory_usage(); + assert(u2 > u1); + } + std::cout << "memory usage okay" << std::endl; +} + int main(int argc, char* argv[]) { @@ -739,6 +751,8 @@ main(int argc, char* argv[]) timestamp_test(); std::cout << "---- is_long_long" << std::endl; is_long_long_test(); + std::cout << "---- memory usage" << std::endl; + memory_usage_test(); } catch (std::exception& e) { std::cout << "unexpected exception: " << e.what() << std::endl; } -- cgit v1.2.3-54-g00ecf From 433f1dae19b63f263af8a3d3cc9b3e328171d9ca Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 31 Aug 2022 13:51:58 -0400 Subject: Add --report-mem-usage option for debugging/testing --- include/qpdf/QPDFJob.hh | 1 + include/qpdf/auto_job_c_main.hh | 1 + job.sums | 14 +++++++------- job.yml | 2 ++ libqpdf/QPDFJob.cc | 11 ++++++++++- libqpdf/QPDFJob_config.cc | 7 +++++++ libqpdf/qpdf/auto_job_help.hh | 3 +++ libqpdf/qpdf/auto_job_init.hh | 1 + libqpdf/qpdf/auto_job_json_init.hh | 3 +++ libqpdf/qpdf/auto_job_schema.hh | 1 + manual/cli.rst | 10 ++++++++++ 11 files changed, 46 insertions(+), 8 deletions(-) diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 0e4d8a2e..2d4ab0d2 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -711,6 +711,7 @@ class QPDFJob bool json_input; bool json_output; std::string update_from_json; + bool report_mem_usage; }; std::shared_ptr m; }; diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh index 90927ded..cc655c23 100644 --- a/include/qpdf/auto_job_c_main.hh +++ b/include/qpdf/auto_job_c_main.hh @@ -33,6 +33,7 @@ QPDF_DLL Config* qdf(); QPDF_DLL Config* rawStreamData(); QPDF_DLL Config* recompressFlate(); QPDF_DLL Config* removePageLabels(); +QPDF_DLL Config* reportMemUsage(); QPDF_DLL Config* requiresPassword(); QPDF_DLL Config* showEncryption(); QPDF_DLL Config* showEncryptionKey(); diff --git a/job.sums b/job.sums index 5ffcd6cd..e194572e 100644 --- a/job.sums +++ b/job.sums @@ -3,15 +3,15 @@ generate_auto_job 9abe2ec994fb98526f5e3c0c199ce2e61a868463cb522a5bc6e9730b655341 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 -include/qpdf/auto_job_c_main.hh cdba1ae6ea5525a585d10a3dd95b7996d62b17de4211fe658b78d9d463b0f313 +include/qpdf/auto_job_c_main.hh 493b9798f5ff8bbcb07c0238693554d77eefa4ae71ce1a0d466de94e3a7a3966 include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c18911614fe8e568ec include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 -job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412 +job.yml a6f22d425980ed960c77c0a4197f46924c14e7943358cd9f0b75811bb1c480ad libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da -libqpdf/qpdf/auto_job_help.hh 53306e4aef8aaca641c0087bc9e064ada1c44a94b826c0bcac7b4eb0c8c41fd5 -libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2 +libqpdf/qpdf/auto_job_help.hh 3e9385a7e0dae993467647466fa30f30baa5968f9270c73ff4e664f5aa415dbe +libqpdf/qpdf/auto_job_init.hh ccb881733849dff5c05721f1aa5c35447cedd415e881c3fef6573901e45be056 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 -libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268 -libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b +libqpdf/qpdf/auto_job_json_init.hh 7ac8f42fb39eda56144ab62b30152a56e9bb2224d0596eb826b7bc421a78d26b +libqpdf/qpdf/auto_job_schema.hh 17352791b09c3b8a8db766375cce31d70c98b67b44ecc398e2ac78984e34fe90 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 41ee93f23f46160fe9eaf7c99fd2ab3bd2e0f6792a341a35bdac1a41cb853ed5 +manual/cli.rst e3fa48bb30c981df1f74d474887155cd6a46f9010b91cd1b7b57e582bf3bf877 diff --git a/job.yml b/job.yml index b411a9b0..5c9dac86 100644 --- a/job.yml +++ b/job.yml @@ -127,6 +127,7 @@ options: - recompress-flate - remove-page-labels - replace-input + - report-mem-usage - requires-password - show-encryption - show-encryption-key @@ -413,6 +414,7 @@ json: Pages.password: _range: "page range" remove-page-labels: + report-mem-usage: rotate: overlay: _file: "source file for overlay" diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 15a81854..7bd563aa 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -417,7 +417,8 @@ QPDFJob::Members::Members() : check_is_encrypted(false), check_requires_password(false), json_input(false), - json_output(false) + json_output(false), + report_mem_usage(false) { } @@ -625,6 +626,14 @@ QPDFJob::run() << ": operation succeeded with warnings\n"; } } + if (m->report_mem_usage) { + // Call get_max_memory_usage before generating output. When + // debugging, it's easier if print statements from + // get_max_memory_usage are not interleaved with the output. + auto mem_usage = QUtil::get_max_memory_usage(); + *this->m->log->getWarn() + << "qpdf-max-memory-usage " << mem_usage << "\n"; + } } bool diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 8a9c1470..3e148fca 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -502,6 +502,13 @@ QPDFJob::Config::removePageLabels() return this; } +QPDFJob::Config* +QPDFJob::Config::reportMemUsage() +{ + o.m->report_mem_usage = true; + return this; +} + QPDFJob::Config* QPDFJob::Config::requiresPassword() { diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 7c3bb266..eb272a04 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -883,6 +883,9 @@ for debugging qpdf. ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between the output of qpdf --json and the output of qpdf --json-help. )"); +ap.addOptionHelp("--report-mem-usage", "testing", "best effort report of memory usage", R"(This is used by qpdf's performance test suite to report the +maximum amount of memory used in supported environments. +)"); } static void add_help(QPDFArgParser& ap) { diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index b90592e0..ad110d16 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -69,6 +69,7 @@ this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); +this->ap.addBare("report-mem-usage", [this](){c_main->reportMemUsage();}); this->ap.addBare("requires-password", [this](){c_main->requiresPassword();}); this->ap.addBare("show-encryption", [this](){c_main->showEncryption();}); this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index 8f8fb987..1cd69368 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -409,6 +409,9 @@ popHandler(); // key: pages pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels +pushKey("reportMemUsage"); +addBare([this]() { c_main->reportMemUsage(); }); +popHandler(); // key: reportMemUsage pushKey("rotate"); addParameter([this](std::string const& p) { c_main->rotate(p); }); popHandler(); // key: rotate diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index aa69c192..9272c596 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -144,6 +144,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ } ], "removePageLabels": "remove explicit page numbers", + "reportMemUsage": "best effort report of memory usage", "rotate": "rotate pages", "overlay": { "file": "source file for overlay", diff --git a/manual/cli.rst b/manual/cli.rst index 021d1dc7..57682c1a 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -3463,6 +3463,16 @@ Related Options memory and is therefore unsuitable for use with large files. This is why it's also not on by default. +.. qpdf:option:: --report-mem-usage + + .. help: best effort report of memory usage + + This is used by qpdf's performance test suite to report the + maximum amount of memory used in supported environments. + + This is used by qpdf's performance test suite to report the maximum + amount of memory used in supported environments. + .. _unicode-passwords: Unicode Passwords -- cgit v1.2.3-54-g00ecf From 4aac7c325acbf80ed4a6fc121c5a36f2d5515ff1 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 31 Aug 2022 14:10:15 -0400 Subject: Update TODO with reminders about memory testing --- TODO | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/TODO b/TODO index a4628397..f664353e 100644 --- a/TODO +++ b/TODO @@ -4,10 +4,12 @@ Next Before Release: -* Review in order #729, #726, #747 +* Review in order #726 * Make ./performance_check usable by other people by having published files to use for testing. * https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf +* Incorporate --report-mem-usage into performance testing. Make sure + there is some test somewhere that exercises the millions of nulls case. * Evaluate issues tagged with `next` * Stay on top of https://github.com/pikepdf/pikepdf/pull/315 -- cgit v1.2.3-54-g00ecf