* [pve-devel] [PATCH V2 pve-qemu] add patch: add memory allocator detection and use malloc_trim for glibc only
2023-05-16 11:22 [pve-devel] [PATCH-SERIES v2 qemu, qemu-server, manager 0/1] add tcmalloc support Alexandre Derumier
@ 2023-05-16 11:22 ` Alexandre Derumier
2023-05-16 11:22 ` [pve-devel] [PATCH qemu-server 1/1] qemu options: add tuning allocator Alexandre Derumier
2023-05-16 11:22 ` [pve-devel] [PATCH pve-manager 1/1] ui: qemu : add tuning option Alexandre Derumier
2 siblings, 0 replies; 4+ messages in thread
From: Alexandre Derumier @ 2023-05-16 11:22 UTC (permalink / raw)
To: pve-devel
Detect if a different allocator than glibc malloc is linked with LD_PRELOAD,
and call malloc_trim() only for glibc malloc
This patch is mostly copy/paste from haproxy
https://github.com/haproxy/haproxy/blob/master/src/pool.c
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
...-and-call-malloc_trim-only-for-glibc.patch | 161 ++++++++++++++++++
debian/patches/series | 1 +
2 files changed, 162 insertions(+)
create mode 100644 debian/patches/pve/0063-detect-allocator-and-call-malloc_trim-only-for-glibc.patch
diff --git a/debian/patches/pve/0063-detect-allocator-and-call-malloc_trim-only-for-glibc.patch b/debian/patches/pve/0063-detect-allocator-and-call-malloc_trim-only-for-glibc.patch
new file mode 100644
index 0000000..586feaa
--- /dev/null
+++ b/debian/patches/pve/0063-detect-allocator-and-call-malloc_trim-only-for-glibc.patch
@@ -0,0 +1,161 @@
+From 058d06a1d714b84ba9db8434c13e7f21d43c40aa Mon Sep 17 00:00:00 2001
+From: Alexandre Derumier <aderumier@odiso.com>
+Date: Wed, 22 Mar 2023 17:51:35 +0100
+Subject: [PATCH] detect allocator and call malloc_trim only for glibc
+
+Allow to detect if a different allocator is dynamicaly loaded
+with LD_PRELOAD , and don't call malloc_trim in this case.
+
+mostly copy/paste from haproxy code
+
+https://github.com/haproxy/haproxy/blob/master/src/pool.c
+Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
+---
+ include/qemu/rcu.h | 19 ++++++++++++
+ util/rcu.c | 76 +++++++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 94 insertions(+), 1 deletion(-)
+
+diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
+index 313fc414bc..11cb31eaf7 100644
+--- a/include/qemu/rcu.h
++++ b/include/qemu/rcu.h
+@@ -192,4 +192,23 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
+ void rcu_add_force_rcu_notifier(Notifier *n);
+ void rcu_remove_force_rcu_notifier(Notifier *n);
+
++/* This macro may be used to block constant propagation that lets the compiler
++ * detect a possible NULL dereference on a variable resulting from an explicit
++ * assignment in an impossible check. Sometimes a function is called which does
++ * safety checks and returns NULL if safe conditions are not met. The place
++ * where it's called cannot hit this condition and dereferencing the pointer
++ * without first checking it will make the compiler emit a warning about a
++ * "potential null pointer dereference" which is hard to work around. This
++ * macro "washes" the pointer and prevents the compiler from emitting tests
++ * branching to undefined instructions. It may only be used when the developer
++ * is absolutely certain that the conditions are guaranteed and that the
++ * pointer passed in argument cannot be NULL by design.
++ */
++#define ALREADY_CHECKED(p) do { asm("" : "=rm"(p) : "0"(p)); } while (0)
++
++/* same as above but to be used to pass the input value to the output but
++ * without letting the compiler know about its initial properties.
++ */
++#define DISGUISE(v) ({ typeof(v) __v = (v); ALREADY_CHECKED(__v); __v; })
++
+ #endif /* QEMU_RCU_H */
+diff --git a/util/rcu.c b/util/rcu.c
+index b6d6c71cff..5dc0fa5f58 100644
+--- a/util/rcu.c
++++ b/util/rcu.c
+@@ -35,6 +35,13 @@
+ #if defined(CONFIG_MALLOC_TRIM)
+ #include <malloc.h>
+ #endif
++#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 8))
++#define GLIBC_HAVE_MALLOC_TRIM
++#endif
++/* glibc 2.33 provides mallinfo2() that overcomes mallinfo()'s type limitations */
++#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 33))
++#define GLIBC_HAVE_MALLINFO2
++#endif
+
+ /*
+ * Global grace period counter. Bit 0 is always one in rcu_gp_ctr.
+@@ -50,6 +57,9 @@ static int in_drain_call_rcu;
+ static QemuMutex rcu_registry_lock;
+ static QemuMutex rcu_sync_lock;
+
++static int using_default_allocator = 1;
++static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL;
++
+ /*
+ * Check whether a quiescent state was crossed between the beginning of
+ * update_counter_and_wait and now.
+@@ -256,7 +266,8 @@ static void *call_rcu_thread(void *opaque)
+ n = qatomic_read(&rcu_call_count);
+ if (n == 0) {
+ #if defined(CONFIG_MALLOC_TRIM)
+- malloc_trim(4 * 1024 * 1024);
++ if (!using_default_allocator)
++ malloc_trim(4 * 1024 * 1024);
+ #endif
+ qemu_event_wait(&rcu_call_ready_event);
+ }
+@@ -445,11 +456,74 @@ static void rcu_init_child(void)
+ }
+ #endif
+
++/* Tries to retrieve the address of the first occurrence symbol <name>.
++ * Note that NULL in return is not always an error as a symbol may have that
++ * address in special situations.
++ */
++static void *get_sym_curr_addr(const char *name)
++{
++ void *ptr = NULL;
++
++#ifdef RTLD_DEFAULT
++ if (!build_is_static)
++ ptr = dlsym(RTLD_DEFAULT, name);
++#endif
++ return ptr;
++}
++
++/* check if we're using the same allocator as the one that provides
++ * malloc_trim() and mallinfo(). The principle is that on glibc, both
++ * malloc_trim() and mallinfo() are provided, and using mallinfo() we
++ * can check if malloc() is performed through glibc or any other one
++ * the executable was linked against (e.g. jemalloc). Prior to this we
++ * have to check whether we're running on jemalloc by verifying if the
++ * mallctl() function is provided. Its pointer will be used later.
++ */
++static void detect_allocator(void)
++{
++#if defined(__ELF__)
++ extern int mallctl(const char *, void *, size_t *, void *, size_t) __attribute__((weak));
++
++ my_mallctl = mallctl;
++#endif
++ if (!my_mallctl)
++ my_mallctl = get_sym_curr_addr("mallctl");
++
++ using_default_allocator = (my_mallctl == NULL);
++
++ if (!my_mallctl) {
++#if defined(GLIBC_HAVE_MALLOC_TRIM)
++
++#ifdef GLIBC_HAVE_MALLINFO2
++ struct mallinfo2 mi1, mi2;
++ void *ptr;
++ mi1 = mallinfo2();
++ ptr = DISGUISE(malloc(1));
++ mi2 = mallinfo2();
++
++#else
++ struct mallinfo mi1, mi2;
++ void *ptr;
++ mi1 = mallinfo();
++ ptr = DISGUISE(malloc(1));
++ mi2 = mallinfo();
++
++#endif
++
++ free(DISGUISE(ptr));
++ using_default_allocator = !!memcmp(&mi1, &mi2, sizeof(mi1));
++#endif
++ }
++}
++
+ static void __attribute__((__constructor__)) rcu_init(void)
+ {
+ smp_mb_global_init();
+ #ifdef CONFIG_POSIX
+ pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
++#endif
++#if defined(CONFIG_MALLOC_TRIM)
++ detect_allocator();
+ #endif
+ rcu_init_complete();
+ }
+--
+2.30.2
+
diff --git a/debian/patches/series b/debian/patches/series
index 4e8ddd6..7715e13 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -94,3 +94,4 @@ pve/0059-vma-create-support-64KiB-unaligned-input-images.patch
pve/0060-vma-create-avoid-triggering-assertion-in-error-case.patch
pve/0061-block-alloc-track-avoid-premature-break.patch
pve/0062-PVE-Backup-allow-passing-max-workers-performance-set.patch
+pve/0063-detect-allocator-and-call-malloc_trim-only-for-glibc.patch
\ No newline at end of file
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [pve-devel] [PATCH qemu-server 1/1] qemu options: add tuning allocator
2023-05-16 11:22 [pve-devel] [PATCH-SERIES v2 qemu, qemu-server, manager 0/1] add tcmalloc support Alexandre Derumier
2023-05-16 11:22 ` [pve-devel] [PATCH V2 pve-qemu] add patch: add memory allocator detection and use malloc_trim for glibc only Alexandre Derumier
@ 2023-05-16 11:22 ` Alexandre Derumier
2023-05-16 11:22 ` [pve-devel] [PATCH pve-manager 1/1] ui: qemu : add tuning option Alexandre Derumier
2 siblings, 0 replies; 4+ messages in thread
From: Alexandre Derumier @ 2023-05-16 11:22 UTC (permalink / raw)
To: pve-devel
Add a new tuning option with allocator property.
Available values:
- Default is 'system', aka glibc malloc
- tcmalloc (improve performance ceph librbd)
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
PVE/QemuServer.pm | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 40be44d..761ab48 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -289,6 +289,19 @@ my $meta_info_fmt = {
},
};
+my $tuning_fmt = {
+ allocator => {
+ type => 'string',
+ enum => [ qw(system tcmalloc) ],
+ default => 'system',
+ description => "Override the memory allocator used in QEMU via LD_PRELOAD.",
+ verbose_description => "Override the memory allocator used in QEMU via LD_PRELOAD."
+ ." Using tcmalloc might improve performance if ceph librbd is used."
+ ." NOTE: you must install the libtcmalloc-minimal4 package first!",
+ optional => 1,
+ },
+};
+
my $confdesc = {
onboot => {
optional => 1,
@@ -723,6 +736,12 @@ EODESCR
description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
optional => 1,
},
+ tuning=> {
+ type => 'string',
+ format => $tuning_fmt,
+ description => "Tune some special features.",
+ optional => 1,
+ },
};
my $cicustom_fmt = {
@@ -2192,6 +2211,16 @@ sub parse_rng {
return $res;
}
+sub parse_tuning {
+ my ($value) = @_;
+
+ return if !$value;
+
+ my $res = eval { parse_property_string($tuning_fmt, $value) };
+ warn $@ if $@;
+ return $res;
+}
+
sub parse_meta_info {
my ($value) = @_;
@@ -5909,6 +5938,12 @@ sub vm_start_nolock {
my $run_qemu = sub {
PVE::Tools::run_fork sub {
+
+ my $tuning = $conf->{tuning} ? parse_tuning($conf->{tuning}) : undef;
+
+ $ENV{LD_PRELOAD} = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"
+ if $tuning && $tuning->{allocator} eq 'tcmalloc';
+
PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
my $tpmpid;
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [pve-devel] [PATCH pve-manager 1/1] ui: qemu : add tuning option
2023-05-16 11:22 [pve-devel] [PATCH-SERIES v2 qemu, qemu-server, manager 0/1] add tcmalloc support Alexandre Derumier
2023-05-16 11:22 ` [pve-devel] [PATCH V2 pve-qemu] add patch: add memory allocator detection and use malloc_trim for glibc only Alexandre Derumier
2023-05-16 11:22 ` [pve-devel] [PATCH qemu-server 1/1] qemu options: add tuning allocator Alexandre Derumier
@ 2023-05-16 11:22 ` Alexandre Derumier
2 siblings, 0 replies; 4+ messages in thread
From: Alexandre Derumier @ 2023-05-16 11:22 UTC (permalink / raw)
To: pve-devel
with memory allocator property
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
---
www/manager6/Makefile | 1 +
www/manager6/Utils.js | 13 +++++++++
www/manager6/form/TuningSelector.js | 41 +++++++++++++++++++++++++++++
www/manager6/qemu/Options.js | 14 ++++++++++
4 files changed, 69 insertions(+)
create mode 100644 www/manager6/form/TuningSelector.js
diff --git a/www/manager6/Makefile b/www/manager6/Makefile
index b73b729a..c29bc87a 100644
--- a/www/manager6/Makefile
+++ b/www/manager6/Makefile
@@ -67,6 +67,7 @@ JSSRC= \
form/StorageSelector.js \
form/TFASelector.js \
form/TokenSelector.js \
+ form/TuningSelector.js \
form/USBSelector.js \
form/UserSelector.js \
form/VLanField.js \
diff --git a/www/manager6/Utils.js b/www/manager6/Utils.js
index 7bf3955a..3c395a5c 100644
--- a/www/manager6/Utils.js
+++ b/www/manager6/Utils.js
@@ -545,6 +545,19 @@ Ext.define('PVE.Utils', {
return output.join(', ');
},
+ render_tuning: function(values) {
+ let props = PVE.Parser.parsePropertyString(values);
+ if (Ext.Object.isEmpty(props)) {
+ return Proxmox.Utils.noneText;
+ }
+
+ let output = [];
+ if (props.allocator === 'tcmalloc') {
+ output.push('Memory allocator: ' + props.allocator);
+ }
+ return output.join(', ');
+ },
+
// fixme: auto-generate this
// for now, please keep in sync with PVE::Tools::kvmkeymaps
kvm_keymaps: {
diff --git a/www/manager6/form/TuningSelector.js b/www/manager6/form/TuningSelector.js
new file mode 100644
index 00000000..d967dc29
--- /dev/null
+++ b/www/manager6/form/TuningSelector.js
@@ -0,0 +1,41 @@
+Ext.define('PVE.form.TuningSelector', {
+ extend: 'Proxmox.panel.InputPanel',
+ alias: 'widget.pveTuningSelector',
+
+ viewModel: {},
+
+ items: [
+ {
+ xtype: 'proxmoxKVComboBox',
+ itemId: 'allocator',
+ name: 'allocator',
+ value: 'system',
+ fieldLabel: 'Memory Allocator',
+ comboItems: [
+ ['system', 'system'],
+ ['tcmalloc', 'tcmalloc'],
+ ],
+ },
+ ],
+
+ onGetValues: function(values) {
+ var ret = {};
+
+ if (values.allocator !== "system") {
+ ret.allocator = values.allocator;
+ }
+
+ if (Ext.Object.isEmpty(ret)) {
+ return { 'delete': 'tuning' };
+ }
+ var tuning_props = PVE.Parser.printPropertyString(ret);
+ return { tuning: tuning_props };
+ },
+
+ setValues: function(values) {
+ if (values.tuning) {
+ var tuning = PVE.Parser.parsePropertyString(values.tuning);
+ this.callParent([tuning]);
+ }
+ },
+});
diff --git a/www/manager6/qemu/Options.js b/www/manager6/qemu/Options.js
index 7b112400..53972d18 100644
--- a/www/manager6/qemu/Options.js
+++ b/www/manager6/qemu/Options.js
@@ -338,6 +338,20 @@ Ext.define('PVE.qemu.Options', {
},
} : undefined,
},
+ tuning: {
+ header: gettext('Tuning'),
+ defaultValue: false,
+ renderer: PVE.Utils.render_tuning,
+ editor: caps.vms['VM.Config.Options'] ? {
+ xtype: 'proxmoxWindowEdit',
+ subject: gettext('Tuning'),
+ onlineHelp: 'chapter_virtual_machines', // FIXME: use 'qm_tuning' once available
+ items: {
+ xtype: 'pveTuningSelector',
+ name: 'tuning',
+ },
+ } : undefined,
+ },
hookscript: {
header: gettext('Hookscript'),
},
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread