* [pve-devel] [PATCH pve-ha-manager 1/8] add AHP && Topsis Math Helpers
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 2/8] get services && nodes stats Alexandre Derumier
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
Topis
https://www.youtube.com/watch?v=kfcN7MuYVeI
AHP:
https://www.youtube.com/watch?v=J4T70o8gjlk
AHP-Topis implementation in vm balancing:
https://arxiv.org/pdf/1002.3329.pdf
https://meral.edu.mm/record/4285/files/9069.pdf
Topsis (Technique for Order Preference by Similarity to Ideal Solution)
is a Multi-criteria decision making method, to find best solution (with a score),
when we need to order multiple values.
simple example: order nodes with by lower higher cpu and higher memory, where memory factor is more important
$nodes->{node1}->{cpu} = 80;
$nodes->{node1}->{mem} = 100;
$nodes->{node2}->{cpu} = 79;
$nodes->{node2}->{mem} = 99;
$nodes->{node3}->{cpu} = 90;
$nodes->{node3}->{mem} = 102;
The score results are
node1 score: 0.745400652669653
node2 score: 0.688707427881571
node3 score: 0.311292572118429
Node1 will be choose as it have the bigger score
We can of course add more parameters for more complex ranking.
Topis need a priority weight between differents parameters.
As it can be very complex to have good weights, the AHP (Analytic Hierarchy Process)
method is used to compute weights with doing a pair-wise compare of priority between all parameters
simple example : mem is twice more important than cpu
my $preferences = {
mem => {
mem => 1,
cpu => 2,
},
cpu => {
cpu => 1,
},
};
weights results:
cpu : 0.333333333333333
mem : 0.666666666666667
---
debian/pve-ha-manager.install | 2 +
src/PVE/HA/Balancer/AHP.pm | 120 ++++++++++++++++++++++++++++++++++
src/PVE/HA/Balancer/Makefile | 6 ++
src/PVE/HA/Balancer/Topsis.pm | 115 ++++++++++++++++++++++++++++++++
src/PVE/HA/Makefile | 2 +
5 files changed, 245 insertions(+)
create mode 100644 src/PVE/HA/Balancer/AHP.pm
create mode 100644 src/PVE/HA/Balancer/Makefile
create mode 100644 src/PVE/HA/Balancer/Topsis.pm
diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install
index 33a5c58..d6979c4 100644
--- a/debian/pve-ha-manager.install
+++ b/debian/pve-ha-manager.install
@@ -19,6 +19,8 @@
/usr/share/perl5/PVE/API2/HA/Status.pm
/usr/share/perl5/PVE/CLI/ha_manager.pm
/usr/share/perl5/PVE/HA/CRM.pm
+/usr/share/perl5/PVE/HA/Balancer/AHP.pm
+/usr/share/perl5/PVE/HA/Balancer/Topsis.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Env.pm
diff --git a/src/PVE/HA/Balancer/AHP.pm b/src/PVE/HA/Balancer/AHP.pm
new file mode 100644
index 0000000..a10c8fc
--- /dev/null
+++ b/src/PVE/HA/Balancer/AHP.pm
@@ -0,0 +1,120 @@
+package PVE::HA::Balancer::AHP;
+
+use strict;
+use warnings;
+
+##maths
+my $bitwise_matrix = sub {
+ my ($hash) = @_;
+
+ my $bitwise_matrix = {};
+ foreach my $rowkey (keys %$hash) {
+ my $row = $hash->{$rowkey};
+ foreach my $columnkey (keys %$row) {
+ $bitwise_matrix->{$rowkey}->{$columnkey} = $row->{$columnkey};
+ $bitwise_matrix->{$columnkey}->{$rowkey} = 1 / $row->{$columnkey};
+ }
+ }
+ return $bitwise_matrix;
+};
+
+my $compute_colum_sum = sub {
+ my ($bitwise_matrix) = @_;
+
+ my $matrix_column_sum = {};
+ foreach my $rowkey (keys %$bitwise_matrix) {
+ my $row = $bitwise_matrix->{$rowkey};
+ foreach my $columnkey (keys %$row) {
+ $matrix_column_sum->{$columnkey} = 0 if !defined($matrix_column_sum->{$columnkey});
+ $matrix_column_sum->{$columnkey} += $row->{$columnkey};
+ }
+ }
+ return $matrix_column_sum;
+};
+
+my $preference_vector = sub {
+ my ($bitwise_matrix) = @_;
+
+ my $matrix_column_sum = &$compute_colum_sum($bitwise_matrix);
+
+ my $preference_vector = {};
+ foreach my $rowkey (keys %$bitwise_matrix) {
+ my $row = $bitwise_matrix->{$rowkey};
+ my $row_sum = 0;
+ foreach my $columnkey (keys %$row) {
+ $row_sum += $row->{$columnkey} / $matrix_column_sum->{$columnkey};
+ }
+ $preference_vector->{$rowkey} += $row_sum/ (keys %$row);
+ }
+ $preference_vector;
+};
+
+my $compute_ci = sub {
+ my ($bitwise_matrix, $preference_vector) = @_;
+
+ my $sum = 0;
+ foreach my $rowkey (keys %$bitwise_matrix) {
+ my $row = $bitwise_matrix->{$rowkey};
+ my $weighted_row_sum = 0;
+ foreach my $columnkey (keys %$row) {
+ $weighted_row_sum += ($row->{$columnkey} * $preference_vector->{$columnkey});
+ }
+ $sum += $weighted_row_sum / $preference_vector->{$rowkey};
+ }
+
+ my $criteria_numbers = keys %$bitwise_matrix;
+ my $avg = $sum / $criteria_numbers;
+ my $ci = ($avg - $criteria_numbers) / ($criteria_numbers - 1);
+ return $ci;
+};
+
+my $compute_ri = sub {
+ my ($bitwise_matrix) = @_;
+
+ my $criteria_numbers = keys %$bitwise_matrix;
+
+ my $ri = {
+ 1 => 0,
+ 2 => 0,
+ 3 => 0.58,
+ 4 => 0.9,
+ 5 => 1.12,
+ 6 => 1.24,
+ 7 => 1.32,
+ 8 => 1.41,
+ 9 => 1.45,
+ 10 => 1.49,
+ 11 => 1.51,
+ 12 => 1.53,
+ 13 => 1.56,
+ 14 => 1.57,
+ 15 => 1.59
+ };
+ die "too much criterias" if $criteria_numbers > 15;
+ return $ri->{$criteria_numbers};
+};
+
+my $verify_ci_index = sub {
+ my ($bitwise_matrix, $preference_vector) = @_;
+
+ die "empty matrix" if !$bitwise_matrix || !keys %$bitwise_matrix;
+
+ my $ri = &$compute_ri($bitwise_matrix);
+ return if $ri == 0;
+
+ my $ci = &$compute_ci($bitwise_matrix, $preference_vector);
+ my $ci_index = $ci/$ri;
+
+ warn "bad ahp ci index:$ci_index. please review your preferences" if $ci_index >= 0.1;
+};
+
+sub compute_weights {
+ my ($preferences) = @_;
+
+ my $bitwise_matrix = &$bitwise_matrix($preferences);
+ my $preference_vector = &$preference_vector($bitwise_matrix);
+ &$verify_ci_index($bitwise_matrix, $preference_vector); #optionnal to verify
+
+ return $preference_vector;
+}
+1;
diff --git a/src/PVE/HA/Balancer/Makefile b/src/PVE/HA/Balancer/Makefile
new file mode 100644
index 0000000..de4b1b2
--- /dev/null
+++ b/src/PVE/HA/Balancer/Makefile
@@ -0,0 +1,6 @@
+SOURCES=Topsis.pm AHP.pm
+
+.PHONY: install
+install:
+ install -d -m 0755 ${DESTDIR}${PERLDIR}/PVE/HA/Balancer
+ for i in ${SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/HA/Balancer/$$i; done
diff --git a/src/PVE/HA/Balancer/Topsis.pm b/src/PVE/HA/Balancer/Topsis.pm
new file mode 100644
index 0000000..e59e9da
--- /dev/null
+++ b/src/PVE/HA/Balancer/Topsis.pm
@@ -0,0 +1,115 @@
+package PVE::HA::Balancer::Topsis;
+
+use strict;
+use warnings;
+
+#topsis best ordering score
+#https://en.wikipedia.org/wiki/TOPSIS
+#https://www.youtube.com/watch?v=kfcN7MuYVeI
+my $normalize = sub {
+ my ($hash, $weights) = @_;
+
+ my $norms = {};
+ foreach my $key (keys %$hash) {
+ my $row = $hash->{$key};
+ foreach my $column (keys %$row) {
+ next if !defined($weights->{$column});
+ $norms->{$column} = 0 if !defined($norms->{$column});
+ $norms->{$column} += $row->{$column} * $row->{$column};
+ }
+ }
+
+ my $result = {};
+
+ foreach my $key (keys %$hash) {
+ my $row = $hash->{$key};
+ foreach my $column (keys %$row) {
+ next if !defined($weights->{$column});
+ if ($norms->{$column} == 0) {
+ $result->{$key}->{$column} = 0;
+ } else {
+ $result->{$key}->{$column} = $row->{$column} / sqrt($norms->{$column});
+ $result->{$key}->{$column} *= $weights->{$column};
+ }
+ }
+ }
+
+ return $result;
+};
+
+my $best_worst_values = sub {
+ my ($hash, $order) = @_;
+
+ my $result = {};
+
+ foreach my $key (keys %$hash) {
+ my $row = $hash->{$key};
+ foreach my $column (keys %$row) {
+
+ if ($order->{$column} eq '+') {
+ $result->{$column}->{best} = $row->{$column} if !defined($result->{$column}->{best}) || $row->{$column} > $result->{$column}->{best};
+ $result->{$column}->{worst} = $row->{$column} if !defined($result->{$column}->{worst}) || $row->{$column} < $result->{$column}->{worst};
+ } elsif ($order->{$column} eq '-') {
+ $result->{$column}->{best} = $row->{$column} if !defined($result->{$column}->{best}) || $row->{$column} < $result->{$column}->{best};
+ $result->{$column}->{worst} = $row->{$column} if !defined($result->{$column}->{worst}) || $row->{$column} > $result->{$column}->{worst};
+ }
+ }
+ }
+ return $result;
+
+};
+
+my $euclidean_distance = sub {
+ my ($hash, $best_worst_hash, $type) = @_;
+
+ my $result = {};
+
+ foreach my $type ('best', 'worst') {
+
+ foreach my $key (keys %$hash) {
+ my $row = $hash->{$key};
+ foreach my $column (keys %$row) {
+ my $diff = ($row->{$column} - $best_worst_hash->{$column}->{$type});
+ $diff *= $diff;
+ $result->{$key}->{$type} = 0 if !defined($result->{$key}->{$type});
+ $result->{$key}->{$type} += $diff;
+ }
+ $result->{$key}->{$type} = sqrt($result->{$key}->{$type});
+ }
+ }
+
+ return $result;
+};
+
+my $compute_score = sub {
+ my ($hash) = @_;
+
+ my $result = {};
+
+ foreach my $key (keys %$hash) {
+ my $row = $hash->{$key};
+ foreach my $column (keys %$row) {
+ if($hash->{$key}->{worst} == 0 && $hash->{$key}->{best} == 0) {
+ $result->{$key}->{score} = 0;
+ } else {
+ $result->{$key}->{score} = $hash->{$key}->{worst} / ($hash->{$key}->{worst} + $hash->{$key}->{best});
+ }
+ }
+ }
+ return $result;
+};
+
+sub score {
+ my ($hash, $weights, $bestorder) = @_;
+
+ die "topsis_score : empty hash" if !$hash || !keys %$hash;
+
+ my $normalized_hash = &$normalize($hash, $weights);
+ my $best_worst_hash = &$best_worst_values($normalized_hash, $bestorder);
+ my $euclidean_distances = &$euclidean_distance($normalized_hash, $best_worst_hash);
+ my $scores = &$compute_score($euclidean_distances);
+
+ return $scores;
+}
+
+1;
diff --git a/src/PVE/HA/Makefile b/src/PVE/HA/Makefile
index c366f6c..a548c86 100644
--- a/src/PVE/HA/Makefile
+++ b/src/PVE/HA/Makefile
@@ -9,9 +9,11 @@ install:
for i in ${SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/HA/$$i; done
make -C Resources install
make -C Env install
+ make -C Balancer install
.PHONY: installsim
installsim:
install -d -m 0755 ${DESTDIR}${PERLDIR}/PVE/HA
for i in ${SIM_SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/HA/$$i; done
make -C Sim install
+ make -C Balancer install
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 2/8] get services && nodes stats
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 1/8] add AHP && Topsis Math Helpers Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 3/8] implement ressource aware service recovery Alexandre Derumier
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
For offline vms in recovery state, we look at rrd for last 20minutes average
(excluding spike with 90th percentile)
For online vms, we get last rrd streamed value.
Need to implement a method to compute last minute average for cpu usage without need to re-read rrd file.
For other metrics, we can use last value.
For Nodes we get last rrd streamed value.
(Also need to compute last minute average for cpu usage)
A topsis score is compute for recovery state vm with order:
- biggest boot memory usage (windows = 100% memory, linux: last mem used)
- biggest cpu usage
We want to restart biggest services first, to have more chance to find a node with enough ressources
---
debian/pve-ha-manager.install | 1 +
src/PVE/HA/Balancer/Makefile | 2 +-
src/PVE/HA/Balancer/Stats.pm | 134 ++++++++++++++++++++++++++++++
src/PVE/HA/Env.pm | 28 +++++++
src/PVE/HA/Env/PVE2.pm | 149 ++++++++++++++++++++++++++++++++++
src/PVE/HA/Manager.pm | 5 ++
src/PVE/HA/Sim/TestEnv.pm | 48 ++++++++++-
7 files changed, 365 insertions(+), 2 deletions(-)
create mode 100644 src/PVE/HA/Balancer/Stats.pm
diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install
index d6979c4..6297997 100644
--- a/debian/pve-ha-manager.install
+++ b/debian/pve-ha-manager.install
@@ -21,6 +21,7 @@
/usr/share/perl5/PVE/HA/CRM.pm
/usr/share/perl5/PVE/HA/Balancer/AHP.pm
/usr/share/perl5/PVE/HA/Balancer/Topsis.pm
+/usr/share/perl5/PVE/HA/Balancer/Stats.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Env.pm
diff --git a/src/PVE/HA/Balancer/Makefile b/src/PVE/HA/Balancer/Makefile
index de4b1b2..95ff86c 100644
--- a/src/PVE/HA/Balancer/Makefile
+++ b/src/PVE/HA/Balancer/Makefile
@@ -1,4 +1,4 @@
-SOURCES=Topsis.pm AHP.pm
+SOURCES=Topsis.pm AHP.pm Stats.pm
.PHONY: install
install:
diff --git a/src/PVE/HA/Balancer/Stats.pm b/src/PVE/HA/Balancer/Stats.pm
new file mode 100644
index 0000000..15162ce
--- /dev/null
+++ b/src/PVE/HA/Balancer/Stats.pm
@@ -0,0 +1,134 @@
+package PVE::HA::Balancer::Stats;
+
+use strict;
+use warnings;
+use PVE::HA::Balancer::Topsis;
+use PVE::HA::Balancer::AHP;
+
+my $compute_node_vms_pressure = sub {
+ my ($self, $node, $vm_stats) = @_;
+
+ return if !defined($self->{online_node_stats}->{$node});
+
+ my $node_stats = $self->{online_node_stats}->{$node}->{stats};
+
+ #count total number of vms vcpus on this host
+ $node_stats->{total_vm_vcpus} = 0 if !defined($node_stats->{total_vm_vcpus});
+ $node_stats->{total_vm_vcpus} += $vm_stats->{maxcpu};
+
+ #add biggest vm pressure
+ $node_stats->{max_vm_pressure} = 0 if !defined($node_stats->{max_vm_pressure});
+ $node_stats->{max_vm_pressure} = $vm_stats->{cpu_pressure} if $vm_stats->{cpu_pressure} > $node_stats->{max_vm_pressure};
+};
+
+my $get_service_stats = sub {
+ my ($self, $ss, $sc) = @_;
+
+ my $haenv = $self->{haenv};
+
+ my $recovery_stats = {};
+
+ foreach my $sid (sort keys %$ss) {
+
+ my $cd = $sc->{$sid};
+ my $node = $cd->{node};
+
+ my (undef, $type, $vmid) = $haenv->parse_sid($sid);
+ $ss->{$sid}->{type} = $type;
+ $ss->{$sid}->{vmid} = $vmid;
+
+ my $stats = { cpu => 0, maxcpu => 0, startmem => 0, mem => 0, maxmem => 0, totalcpu => 0, cpu_pressure => 0, recovery_score => 0 };
+ $ss->{$sid}->{stats} = $stats;
+
+ if ($ss->{$sid}->{state} eq 'recovery') {
+ #get vm/ct stats history on last 20min (95percentile)
+ $stats = $haenv->get_vm_offline_rrd_stats($vmid, 95);
+ } elsif ($ss->{$sid}->{state} eq 'started') {
+ #get last stats from cache.
+ $stats = $haenv->get_vm_rrd_stats($vmid);
+ } else {
+ #avoid to compute all stats, as currently we only support recovery and started for balancing;
+ next;
+ }
+
+ my $vmconf = $haenv->read_vm_ct_config($vmid, $type);
+ $ss->{$sid}->{vmconf} = $vmconf;
+
+ $stats->{startmem} = $stats->{mem};
+ #windows vm fill memory with zero at boot, so mem = maxmem
+ $stats->{startmem} = $stats->{maxmem} if $vmconf && defined($vmconf->{ostype}) && $vmconf->{ostype} eq 'windows';
+
+ #totalcpu = relative cpu for 1core. 50% of 4 cores = 200% of 1 core
+ $stats->{totalcpu} = $stats->{cpu} * 100 * $stats->{maxcpu};
+
+ $stats->{recovery_score} = 0;
+
+ &$compute_node_vms_pressure($self, $cd, $stats);
+
+ $ss->{$sid}->{stats} = $stats;
+ $recovery_stats->{$sid} = $stats if $ss->{$sid}->{state} eq 'recovery';
+ }
+
+ #compute scores for recovery services
+ return if !keys %$recovery_stats;
+
+ my $weights = $self->{balancer}->{topsis}->{services_recovery}->{weights};
+ my $order = $self->{balancer}->{topsis}->{services_recovery}->{order};
+ my $scores = PVE::HA::Balancer::Topsis::score($recovery_stats, $weights, $order);
+
+ foreach my $sid (sort keys %$scores) {
+ $ss->{$sid}->{stats}->{recovery_score} = $scores->{$sid}->{score};
+ }
+};
+
+sub recompute_node_service_stats {
+ my ($self, $ss, $sc) = @_;
+
+ my $online_node_stats = {};
+ my $online_nodes = $self->{ns}->list_online_nodes();
+
+ foreach my $node (@$online_nodes) {
+ my $stats = $self->{haenv}->get_node_rrd_stats($node);
+ $stats->{cpu} = 0 if !defined($stats->{cpu});
+ $stats->{cpu_pressure} = 0 if !defined($stats->{cpu_pressure}); #fixme: implement rrd
+ $stats->{maxcpu} = 0 if !defined($stats->{maxcpu});
+ $stats->{mem} = 0 if !defined($stats->{mem});
+ $stats->{ksm} = 0 if !defined($stats->{ksm}); #fixme: implement rrd
+ $stats->{maxmem} = 0 if !defined($stats->{maxmem});
+ $stats->{totalcpu} = $stats->{cpu} * 100 * $stats->{maxcpu}; #how to handle different cpu model power ? bogomips ?
+ $stats->{total_vm_vcpus} = 0;
+ $stats->{max_vm_pressure} = 0;
+ $online_node_stats->{$node}->{stats} = $stats;
+ }
+
+ &$get_service_stats($self, $ss, $sc);
+
+ $self->{online_node_stats} = $online_node_stats;
+}
+
+sub compute_ahp_recovery_weights {
+ my ($self) = @_;
+
+ #bigger memory/cpu for offline service, better chance to find free space first
+
+ my $bestorder = {
+ startmem => "+",
+ totalcpu => "+",
+ };
+
+ my $preferences = {
+ startmem => {
+ startmem => 1,
+ totalcpu => 2,
+ },
+ totalcpu => {
+ startmem => 1,
+ },
+ };
+
+ my $weights = PVE::HA::Balancer::AHP::compute_weights($preferences);
+ $self->{balancer}->{topsis}->{services_recovery}->{weights} = $weights;
+ $self->{balancer}->{topsis}->{services_recovery}->{order} = $bestorder;
+}
+
+1;
diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index ac569a9..2ecc186 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -269,4 +269,32 @@ sub get_ha_settings {
return $self->{plug}->get_ha_settings();
}
+sub get_node_rrd_stats {
+ my ($self, $node) = @_;
+
+ return $self->{plug}->get_node_rrd_stats($node);
+}
+
+sub get_vm_rrd_stats {
+ my ($self, $vmid, $percentile) = @_;
+
+ return $self->{plug}->get_vm_rrd_stats($vmid, $percentile);
+}
+
+sub get_vm_offline_rrd_stats {
+ my ($self, $vmid, $percentile) = @_;
+
+ return $self->{plug}->get_vm_offline_rrd_stats($vmid, $percentile);
+}
+
+sub read_vm_ct_config {
+ my ($self, $vmid, $type) = @_;
+
+ if ($type eq 'vm') {
+ return $self->{plug}->read_vm_config($vmid);
+ } elsif ($type eq 'ct') {
+ return $self->{plug}->read_ct_config($vmid);
+ }
+}
+
1;
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 5e0a683..917aa62 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -12,6 +12,11 @@ use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file
use PVE::DataCenterConfig;
use PVE::INotify;
use PVE::RPCEnvironment;
+use PVE::API2Tools;
+use PVE::QemuConfig;
+use PVE::QemuServer;
+use PVE::LXC::Config;
+use RRDs;
use PVE::HA::Tools ':exit_codes';
use PVE::HA::Env;
@@ -459,4 +464,148 @@ sub get_ha_settings {
return $datacenterconfig->{ha};
}
+sub get_node_rrd_stats {
+ my ($self, $node) = @_;
+
+ my $rrd = PVE::Cluster::rrd_dump();
+ my $members = PVE::Cluster::get_members();
+
+ my $stats = PVE::API2Tools::extract_node_stats($node, $members, $rrd);
+
+ # improve me
+ # we could compute last average minute for cpu usage,
+ # for others values, use can use last value
+
+ return $stats;
+}
+
+sub get_vm_rrd_stats {
+ my ($self, $vmid) = @_;
+
+ my $rrd = PVE::Cluster::rrd_dump();
+ my $vmlist = PVE::Cluster::get_vmlist() || {};
+ my $idlist = $vmlist->{ids} || {};
+
+ my $data = $idlist->{$vmid};
+ my $stats = PVE::API2Tools::extract_vm_stats($vmid, $data, $rrd);
+
+ # improve me
+ # we could compute last average minute for cpu usage,
+ # for others values, use can use last value
+
+ return $stats;
+}
+
+sub get_vm_offline_rrd_stats {
+ my ($self, $vmid, $percentile) = @_;
+
+ my $rrdname = "pve2-vm/$vmid";
+ my $rrddir = "/var/lib/rrdcached/db";
+
+ my $rrd = "$rrddir/$rrdname";
+
+ my $cf = "AVERAGE";
+
+ my $reso = 60;
+ my $ctime = $reso*int(time()/$reso);
+
+ #last 20minutes average?
+
+ my $req_start = $ctime - $reso*20;
+ my $req_end = $ctime - $reso*1;
+
+ my @args = (
+ "-s" => $req_start,
+ "-e" => $req_end,
+ "-r" => $reso,
+ );
+
+ my $socket = "/var/run/rrdcached.sock";
+ push @args, "--daemon" => "unix:$socket" if -S $socket;
+
+ my ($start, $step, $names, $data) = RRDs::fetch($rrd, $cf, @args);
+
+ my @cpu = ();
+ my @mem = ();
+ my @maxmem = ();
+ my @maxcpu = ();
+
+ #fixme: implement true cgroup host cpu/mem && pressure in rrd
+
+ foreach my $rec (@$data) {
+ my $maxcpu = @$rec[0] || 0;
+ my $cpu = @$rec[1] || 0;
+ my $maxmem = @$rec[2] || 0;
+ my $mem = @$rec[3] || 0;
+ #skip zeros values if vm is down
+ push @cpu, $cpu*$maxcpu if $cpu > 0;
+ push @mem, $mem if $mem > 0;
+ push @maxcpu, $maxcpu if $maxcpu > 0;
+ push @maxmem, $maxmem if $maxmem > 0;
+ }
+
+ my $stats = {};
+
+ $stats->{cpu} = percentile($percentile, \@cpu) || 0;
+ $stats->{mem} = percentile($percentile, \@mem) || 0;
+ $stats->{maxmem} = percentile($percentile, \@maxmem) || 0;
+ $stats->{maxcpu} = percentile($percentile, \@maxcpu) || 0;
+ $stats->{totalcpu} = $stats->{cpu} * $stats->{maxcpu} * 100;
+ $stats->{cpu_pressure} = 0;
+ return $stats;
+}
+
+sub percentile {
+ my ($p, $aref) = @_;
+ my $percentile = int($p * $#{$aref}/100);
+ return (sort @$aref)[$percentile];
+}
+
+sub read_vm_config {
+ my ($self, $vmid) = @_;
+
+ my $conf = undef;
+ my $finalconf = {};
+
+ my $vmlist = PVE::Cluster::get_vmlist();
+ my $node = $vmlist->{ids}->{$vmid}->{node};
+
+ eval { $conf = PVE::QemuConfig->load_config($vmid, $node)};
+ return if !$conf;
+
+ if ( PVE::QemuServer::windows_version($conf->{ostype}) ) {
+ $finalconf->{ostype} = 'windows';
+ } else {
+ $finalconf->{ostype} = $conf->{ostype};
+ }
+
+ PVE::QemuConfig->foreach_volume($conf, sub {
+ my ($ds, $drive) = @_;
+
+ $finalconf->{$ds} = $conf->{$ds};
+ });
+
+ return $finalconf;
+}
+
+sub read_ct_config {
+ my ($self, $vmid) = @_;
+
+ my $conf = undef;
+ my $finalconf = {};
+
+ my $vmlist = PVE::Cluster::get_vmlist();
+ my $node = $vmlist->{ids}->{$vmid}->{node};
+
+ eval { $conf = PVE::LXC::Config->load_config($vmid, $node)};
+ return if !$conf;
+
+ PVE::LXC::Config->foreach_volume($conf, sub {
+ my ($ms, $mountpoint) = @_;
+ $finalconf->{$ms} = $conf->{$ms};
+ });
+
+ return $finalconf;
+}
+
1;
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 2deea57..68b2872 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -7,6 +7,7 @@ use Digest::MD5 qw(md5_base64);
use PVE::Tools;
use PVE::HA::Tools ':exit_codes';
use PVE::HA::NodeStatus;
+use PVE::HA::Balancer::Stats;
sub new {
my ($this, $haenv) = @_;
@@ -26,6 +27,8 @@ sub new {
$self->{ms} = { master_node => $haenv->nodename() };
+ PVE::HA::Balancer::Stats::compute_ahp_recovery_weights($self);
+
return $self;
}
@@ -395,6 +398,8 @@ sub manage {
$self->recompute_online_node_usage();
+ PVE::HA::Balancer::Stats::recompute_node_service_stats($self, $ss, $sc);
+
foreach my $sid (sort keys %$ss) {
my $sd = $ss->{$sid};
my $cd = $sc->{$sid} || { state => 'disabled' };
diff --git a/src/PVE/HA/Sim/TestEnv.pm b/src/PVE/HA/Sim/TestEnv.pm
index b448d72..ee261ef 100644
--- a/src/PVE/HA/Sim/TestEnv.pm
+++ b/src/PVE/HA/Sim/TestEnv.pm
@@ -118,4 +118,50 @@ sub get_max_workers {
return 0;
}
-1;
+sub get_node_rrd_stats {
+ my ($self, $node) = @_;
+
+ my $nodestats = $self->{hardware}->{node_stats};
+ my $stats = $nodestats->{$node};
+
+ return $stats;
+}
+
+sub get_vm_rrd_stats {
+ my ($self, $vmid) = @_;
+
+ my $vmstats = $self->{hardware}->{service_stats};
+ my $stats = $vmstats->{$vmid};
+
+ $stats->{uptime} = $stats->{uptime} || 400;
+ $stats->{cpu} = $stats->{cpu} || 0;
+ $stats->{mem} = $stats->{mem} || 0;
+ $stats->{maxmem} = $stats->{maxmem} || 0;
+ $stats->{maxcpu} = $stats->{maxcpu} || 0;
+ $stats->{totalcpu} = $stats->{cpu} * $stats->{maxcpu} * 100;
+ $stats->{cpu_pressure} = $stats->{cpu_pressure} || 0;
+
+ return $stats;
+}
+
+sub get_vm_offline_rrd_stats {
+ my ($self, $vmid, $percentile) = @_;
+
+ my $stats = $self->get_vm_rrd_stats($vmid);
+
+ return $stats;
+}
+
+sub read_vm_config {
+ my ($self, $vmid) = @_;
+
+ return $self->{hardware}->{vm_config}->{$vmid};
+}
+
+sub read_ct_config {
+ my ($self, $vmid) = @_;
+
+ return $self->{hardware}->{vm_config}->{$vmid};
+}
+
+1;
\ No newline at end of file
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 3/8] implement ressource aware service recovery
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 1/8] add AHP && Topsis Math Helpers Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 2/8] get services && nodes stats Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 4/8] add vm loadbalancing Alexandre Derumier
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
Use a new method to find destination node for the service recovery
First, we ordering services by topsis score
Then we try to find the best target node.
FILTERING
---------
1)
We check is node is able to start vm
- host have enough cores
- host have enough memory
- storage availability
- not yet implemented: more checks could be added (cpumodel compat, bridge availability,antiaffinity, local devices,....)
2) if balancing option is enabled, we compute check load of the target node
and we skip this node if:
- too much global pressure (some cores are already more than 100%, so we can't trust cpu average)
- a vm of this node already have an high cpu pressure (0.75)
- target host cpu usage > 70%
- target host mem usage > 83% or > 75% if ksm is already big (>20% ksm memory)
(cpu pressure && ksm are not currently available in rrd, this need to be implemented)
ORDERING
--------
1a) if balancing is enabled, we compute topsis score of the nodes with
- biggest hagroup priority
- lowest cpu pressure
- lowest target cpu
- lowest target mem
Then we return the node with the highest scoe
1b) if balancing is disable, we use the classic ordering
- hagroup priority
- online_node_usage
- node name
Then we return the first node of the list
---
debian/pve-ha-manager.install | 1 +
src/PVE/HA/Balancer/Makefile | 2 +-
src/PVE/HA/Balancer/Nodes.pm | 217 ++++++++++++++++++++++++++++++++++
src/PVE/HA/Env.pm | 11 ++
src/PVE/HA/Env/PVE2.pm | 52 ++++++++
src/PVE/HA/Manager.pm | 32 +++--
src/PVE/HA/Sim/TestEnv.pm | 20 ++++
7 files changed, 327 insertions(+), 8 deletions(-)
create mode 100644 src/PVE/HA/Balancer/Nodes.pm
diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install
index 6297997..e083214 100644
--- a/debian/pve-ha-manager.install
+++ b/debian/pve-ha-manager.install
@@ -22,6 +22,7 @@
/usr/share/perl5/PVE/HA/Balancer/AHP.pm
/usr/share/perl5/PVE/HA/Balancer/Topsis.pm
/usr/share/perl5/PVE/HA/Balancer/Stats.pm
+/usr/share/perl5/PVE/HA/Balancer/Nodes.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Env.pm
diff --git a/src/PVE/HA/Balancer/Makefile b/src/PVE/HA/Balancer/Makefile
index 95ff86c..92ab8d3 100644
--- a/src/PVE/HA/Balancer/Makefile
+++ b/src/PVE/HA/Balancer/Makefile
@@ -1,4 +1,4 @@
-SOURCES=Topsis.pm AHP.pm Stats.pm
+SOURCES=Topsis.pm AHP.pm Stats.pm Nodes.pm
.PHONY: install
install:
diff --git a/src/PVE/HA/Balancer/Nodes.pm b/src/PVE/HA/Balancer/Nodes.pm
new file mode 100644
index 0000000..a06ed62
--- /dev/null
+++ b/src/PVE/HA/Balancer/Nodes.pm
@@ -0,0 +1,217 @@
+package PVE::HA::Balancer::Nodes;
+
+use strict;
+use warnings;
+use PVE::HA::Balancer::Topsis;
+use PVE::HA::Balancer::AHP;
+
+
+my $compute_node_target_cpu_pct = sub{
+ my ($node_stats, $vm_stats) = @_;
+
+ return 0 if $node_stats->{maxmem} == 0;
+ return ($node_stats->{totalcpu} + $vm_stats->{totalcpu}) / $node_stats->{maxcpu};
+};
+
+my $compute_node_target_mem_pct = sub {
+ my ($node_stats, $vm_stats) = @_;
+
+ return 0 if $node_stats->{maxmem} == 0;
+ return ($node_stats->{mem} + $vm_stats->{mem}) * 100 / $node_stats->{maxmem};
+};
+
+my $add_prio = sub {
+ my ($self, $sd, $nodename, $group_members_prio) = @_;
+
+ my $vm_stats = $sd->{stats};
+ my $node_stats = $self->{online_node_stats}->{$nodename}->{stats};
+
+ my $node = {};
+ $node->{prio} = $group_members_prio->{$nodename};
+ $node->{affinity} = 0; #fixme, need to implement vm group
+ $node->{online_node_usage} = $self->{online_node_usage}->{$nodename};
+ $node->{name} = $nodename;
+ $node->{cpu_pressure} = 0; #fixme, need to stream rrd graph first
+ $node->{target_cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
+ $node->{target_mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats);
+
+ return $node;
+};
+
+my $find_target_by_score = sub {
+ my($self, $nodes) = @_;
+
+ return if !keys %$nodes;
+
+ my $weights = $self->{balancer}->{topsis}->{nodes}->{weights};
+ my $order = $self->{balancer}->{topsis}->{nodes}->{order};
+ my $scores = PVE::HA::Balancer::Topsis::score($nodes, $weights, $order);
+
+ my @targets = sort {
+ $scores->{$b}->{score} <=> $scores->{$a}->{score}
+ } keys %$scores;
+
+ return $targets[0];
+};
+
+my $find_target_by_prio = sub {
+ my($self, $nodes) = @_;
+
+ return if !keys %$nodes;
+
+ my @targets = sort {
+ $nodes->{$b}->{prio} <=> $nodes->{$a}->{prio} ||
+ $nodes->{$a}->{online_node_usage} <=> $nodes->{$b}->{online_node_usage} ||
+ $nodes->{$a}->{name} cmp $nodes->{$b}->{name}
+ } keys %$nodes;
+ return $targets[0];
+};
+
+my $check_bridge_availability = sub {
+ my ($vmconf, $node) = @_;
+ #fixme
+ return 1;
+};
+
+my $check_cpumodel_compatibility = sub {
+ my ($vmconf, $node) = @_;
+ #fixme
+ return 1;
+};
+
+my $check_target_load = sub {
+ my ($self, $sd, $node) = @_;
+
+ return 1 if !$self->{balancer}->{enabled};
+
+ my $vm_stats = $sd->{stats};
+ my $node_stats = $self->{online_node_stats}->{$node}->{stats};
+
+ my $max_threshold = { cpu => 70, mem => 83, cpu_pressure => 3, vm_pressure => 0.75 };
+ # if ksm sharing is already huge (20% of total memory), reduce mem threshold to 75%
+ $max_threshold->{mem} = 75 if $node_stats->{ksm} > $node_stats->{maxmem} * 0.2;
+
+ my $target_mem_percent = &$compute_node_target_mem_pct($node_stats, $vm_stats);
+ return if $target_mem_percent > $max_threshold->{mem};
+
+ #don't use node if already too much global pressure (some cores are already more than 100%, so we can't trust cpu average)
+ return if $node_stats->{cpu_pressure} > $max_threshold->{cpu_pressure};
+
+ #don't use node if a vm is already overloaded on this node
+ return if $node_stats->{max_vm_pressure} > $max_threshold->{vm_pressure};
+
+ my $target_cpu_percent = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
+ return if $target_cpu_percent > $max_threshold->{cpu};
+
+ return 1;
+};
+
+my $check_hard_constraints = sub {
+ my ($self, $sd, $node, $group_members_prio) = @_;
+
+ my $haenv = $self->{haenv};
+ my $vm_stats = $sd->{stats};
+ my $node_stats = $self->{online_node_stats}->{$node}->{stats};
+ my $vmconf = $sd->{vmconf};
+ #node need to have a prio(restricted group)
+ return if !defined($group_members_prio->{$node});
+
+ #vm can't start if host have less core
+ return if $node_stats->{maxcpu} < $vm_stats->{maxcpu};
+ #vm can't start if node don't have enough mem to handle vm max mem
+ return if ($node_stats->{maxmem} - $node_stats->{mem}) < $vm_stats->{maxmem};
+
+ return if !$haenv->check_storage_availability($vmconf, $sd->{type}, $node, $self->{storecfg});
+
+ return if !&$check_bridge_availability($vmconf, $node);
+
+ return if !&$check_cpumodel_compatibility($vmconf, $node);
+
+ return 1;
+};
+
+sub find_target {
+ my($self, $cd, $sd, $group_members_prio) = @_;
+
+ my $online_nodes = $self->{online_node_stats};
+
+ my $target_nodes = {};
+
+ foreach my $node (keys %$online_nodes) {
+
+ #### FILTERING NODES WITH HARD CONSTRAINTS (vm can't be started)
+ next if !&$check_hard_constraints($self, $sd, $node, $group_members_prio);
+
+ ### FILTERING too much loaded nodes
+ next if !&$check_target_load($self,$sd, $node);
+
+ #### compute differents prio
+ $target_nodes->{$node} = &$add_prio($self, $sd, $node, $group_members_prio);
+ }
+
+ # if ressource aware is enabled, order by score
+ if ($self->{balancer}->{enabled}) {
+ return &$find_target_by_score($self, $target_nodes);
+ } else {
+ return &$find_target_by_prio($self, $target_nodes);
+ }
+}
+
+
+sub compute_ahp_weights {
+ my ($self) = @_;
+
+ #"+" -> benefit -> bigger better
+ #"-" -> cost ---> lower better
+ my $bestorder = {
+ prio => "+",
+ affinity => "-",
+ cpu_pressure => "-",
+ target_cpu_pct => "-",
+ target_mem_pct => "-",
+ };
+
+ #1 : equal importance (two activities contribute equally to the objective)
+ #3 : Moderate importance of one factor over another (experience and judgment strongly favor one activity over another)
+ #5 : Strong or essential importance (experience and judgment strongly favor one activity over another)
+ #7 : Very strong importance (and activity is strongly favored and it's dominance demonstrated in practice)
+ #9 : Extreme importance (the evidence favoring one activity over another is the highest possible order of affirmation)
+
+
+ #same weight of cpu && mem, well balanced node prio
+ my $preferences = {
+ prio => {
+ prio => 1,
+ affinity => 3,
+ cpu_pressure => 5,
+ target_cpu_pct => 9,
+ target_mem_pct => 9,
+ },
+ affinity => {
+ affinity => 1,
+ cpu_pressure => 3,
+ target_cpu_pct => 7,
+ target_mem_pct => 7,
+ },
+ cpu_pressure => {
+ cpu_pressure => 1,
+ target_cpu_pct => 5,
+ target_mem_pct => 7,
+ },
+ target_cpu_pct => {
+ target_cpu_pct => 1,
+ target_mem_pct => 1,
+ },
+ target_mem_pct => {
+ target_mem_pct => 1,
+ },
+ };
+
+ my $weights = PVE::HA::Balancer::AHP::compute_weights($preferences);
+
+ $self->{balancer}->{topsis}->{nodes}->{weights} = $weights;
+ $self->{balancer}->{topsis}->{nodes}->{order} = $bestorder;
+}
+
+
+1;
diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index 2ecc186..757c5e0 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -297,4 +297,15 @@ sub read_vm_ct_config {
}
}
+sub read_storecfg {
+ my ($self) = @_;
+
+ return $self->{plug}->read_storecfg();
+}
+
+sub check_storage_availability {
+ my ($self, $vmconf, $type, $node, $storecfg) = @_;
+
+ return $self->{plug}->check_storage_availability($vmconf, $type, $node, $storecfg);
+}
1;
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 917aa62..ee97559 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -16,6 +16,7 @@ use PVE::API2Tools;
use PVE::QemuConfig;
use PVE::QemuServer;
use PVE::LXC::Config;
+use PVE::Storage;
use RRDs;
use PVE::HA::Tools ':exit_codes';
@@ -608,4 +609,55 @@ sub read_ct_config {
return $finalconf;
}
+sub read_storecfg {
+ my ($self) = @_;
+
+ return PVE::Storage::config();
+}
+
+sub check_storage_availability {
+ my ($self, $vmconf, $type, $node, $storecfg) = @_;
+
+ if ($type eq 'vm') {
+ eval { PVE::QemuServer::check_storage_availability($storecfg, $vmconf, $node) };
+ return if $@;
+ } elsif ($type eq 'ct') {
+ eval { check_lxc_storage_availability($storecfg, $vmconf, $node) };
+ return if $@;
+ }
+ return 1;
+}
+
+
+
+##copy/paste from PVE::LXC::Migrate. add ad PVE::LXC::check_storage_availability like qemuserver
+sub check_lxc_storage_availability {
+ my ($storecfg, $conf, $node) = @_;
+
+ PVE::LXC::Config->foreach_volume_full($conf, { include_unused => 1 }, sub {
+ my ($ms, $mountpoint) = @_;
+
+ my $volid = $mountpoint->{volume};
+ my $type = $mountpoint->{type};
+
+ # skip dev/bind mps when shared
+ if ($type ne 'volume') {
+ if ($mountpoint->{shared}) {
+ return;
+ } else {
+ die "cannot migrate local $type mount point '$ms'\n";
+ }
+ }
+
+ my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1) if $volid;
+ die "can't determine assigned storage for mount point '$ms'\n" if !$storage;
+
+ # check if storage is available on both nodes
+ my $scfg = PVE::Storage::storage_check_enabled($storecfg, $storage);
+ PVE::Storage::storage_check_enabled($storecfg, $storage, $node);
+
+ die "content type 'rootdir' is not available on storage '$storage'\n"
+ if !$scfg->{content}->{rootdir};
+ });
+}
1;
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 68b2872..e021d60 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -8,6 +8,7 @@ use PVE::Tools;
use PVE::HA::Tools ':exit_codes';
use PVE::HA::NodeStatus;
use PVE::HA::Balancer::Stats;
+use PVE::HA::Balancer::Nodes;
sub new {
my ($this, $haenv) = @_;
@@ -28,6 +29,7 @@ sub new {
$self->{ms} = { master_node => $haenv->nodename() };
PVE::HA::Balancer::Stats::compute_ahp_recovery_weights($self);
+ PVE::HA::Balancer::Nodes::compute_ahp_weights($self);
return $self;
}
@@ -399,8 +401,14 @@ sub manage {
$self->recompute_online_node_usage();
PVE::HA::Balancer::Stats::recompute_node_service_stats($self, $ss, $sc);
+ $self->{storecfg} = $haenv->read_storecfg();
+
+ foreach my $sid (
+ sort {
+ $ss->{$b}->{stats}->{recovery_score} <=> $ss->{$a}->{stats}->{recovery_score} ||
+ $ss->{$a}->{type} cmp $ss->{$b}->{type}}
+ keys %$ss) {
- foreach my $sid (sort keys %$ss) {
my $sd = $ss->{$sid};
my $cd = $sc->{$sid} || { state => 'disabled' };
@@ -816,12 +824,7 @@ sub next_state_recovery {
$self->recompute_online_node_usage(); # we want the most current node state
- my $recovery_node = select_service_node(
- $self->{groups},
- $self->{online_node_usage},
- $cd,
- $sd->{node},
- );
+ my $recovery_node = $self->find_node_target($cd , $sd);
if ($recovery_node) {
my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'";
@@ -836,6 +839,11 @@ sub next_state_recovery {
$haenv->steal_service($sid, $sd->{node}, $recovery_node);
$self->{online_node_usage}->{$recovery_node}++;
+ #add vm cpu/mem to current node stats (this is an estimation based on last 20min vm stats)
+ my $node_stats = $self->{online_node_stats}->{$recovery_node}->{stats};
+ $node_stats->{totalcpu} += $sd->{stats}->{totalcpu};
+ $node_stats->{mem} += $sd->{stats}->{mem};
+
# NOTE: $sd *is normally read-only*, fencing is the exception
$cd->{node} = $sd->{node} = $recovery_node;
my $new_state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
@@ -853,4 +861,14 @@ sub next_state_recovery {
}
}
+sub find_node_target {
+ my($self, $cd, $sd) = @_;
+
+ my $online_nodes = $self->{online_node_stats};
+ my $groups = $self->{groups};
+ my $hagroup = get_service_group($groups, $online_nodes, $cd);
+ my ($pri_groups, $group_members_prio) = get_node_priority_groups($hagroup, $online_nodes);
+ return PVE::HA::Balancer::Nodes::find_target($self, $cd, $sd, $group_members_prio);
+}
+
1;
diff --git a/src/PVE/HA/Sim/TestEnv.pm b/src/PVE/HA/Sim/TestEnv.pm
index ee261ef..8c86c84 100644
--- a/src/PVE/HA/Sim/TestEnv.pm
+++ b/src/PVE/HA/Sim/TestEnv.pm
@@ -164,4 +164,24 @@ sub read_ct_config {
return $self->{hardware}->{vm_config}->{$vmid};
}
+sub read_storecfg {
+ my ($self) = @_;
+
+ return $self->{hardware}->{storecfg};
+}
+
+sub check_storage_availability {
+ my ($self, $vmconf, $type, $node, $storecfg) = @_;
+
+ return 1 if !$vmconf;
+
+ my $vm_storage = $vmconf->{storage};
+ return 1 if !defined($vm_storage);
+
+ foreach my $storeid (keys %$storecfg) {
+ next if !defined($storecfg->{$storeid}->{nodes}->{$node});
+ return 1 if $vm_storage eq $storeid;
+ }
+}
+
1;
\ No newline at end of file
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 4/8] add vm loadbalancing
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
` (2 preceding siblings ...)
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 3/8] implement ressource aware service recovery Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 5/8] sim : hardware: add read stats Alexandre Derumier
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
This is a vm centric loadbalancer with some inspiration of the
vmware drs 2.0 scheduler.
https://blogs.vmware.com/vsphere/2020/05/vsphere-7-a-closer-look-at-the-vm-drs-score.html
This look at bad performance vms, give a cluster topsis score for each
bad vm.
for each vm (CT are skipped as we can't live migrate them),
we keep vms with:
- high cpu pressure (> 0.75)
- high pseudo memory pressure.
We could use the true vm memory pressure, and it's already too late,
host is already swapping.
Pseudo memory pressure is the percentage of host mem over threshold vs vm mem.
the mem threshold is 85% host memory or 80% if ksm is already big)
- not yet implemented: vm affinity could be check here
Then we compute a topsis score with
- biggest affinity
- biggest cpu pressure
- biggest memory pseudo pressure
- lowest cpu usage
- lowest mem
and we try to migrate the biggest scored vm to the best target node. (Same than for service recovery)
---
debian/pve-ha-manager.install | 1 +
src/PVE/HA/Balancer/Makefile | 2 +-
src/PVE/HA/Balancer/Services.pm | 187 ++++++++++++++++++++++++++++++++
src/PVE/HA/Manager.pm | 47 +++++++-
4 files changed, 235 insertions(+), 2 deletions(-)
create mode 100644 src/PVE/HA/Balancer/Services.pm
diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install
index e083214..3bc7cc8 100644
--- a/debian/pve-ha-manager.install
+++ b/debian/pve-ha-manager.install
@@ -23,6 +23,7 @@
/usr/share/perl5/PVE/HA/Balancer/Topsis.pm
/usr/share/perl5/PVE/HA/Balancer/Stats.pm
/usr/share/perl5/PVE/HA/Balancer/Nodes.pm
+/usr/share/perl5/PVE/HA/Balancer/Services.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Config.pm
/usr/share/perl5/PVE/HA/Env.pm
diff --git a/src/PVE/HA/Balancer/Makefile b/src/PVE/HA/Balancer/Makefile
index 92ab8d3..ec1823d 100644
--- a/src/PVE/HA/Balancer/Makefile
+++ b/src/PVE/HA/Balancer/Makefile
@@ -1,4 +1,4 @@
-SOURCES=Topsis.pm AHP.pm Stats.pm Nodes.pm
+SOURCES=Topsis.pm AHP.pm Stats.pm Nodes.pm Services.pm
.PHONY: install
install:
diff --git a/src/PVE/HA/Balancer/Services.pm b/src/PVE/HA/Balancer/Services.pm
new file mode 100644
index 0000000..6cce6a7
--- /dev/null
+++ b/src/PVE/HA/Balancer/Services.pm
@@ -0,0 +1,187 @@
+package PVE::HA::Balancer::Services;
+
+use strict;
+use warnings;
+use PVE::HA::Balancer::Topsis;
+use PVE::HA::Balancer::AHP;
+
+my $check_anti_affinity = sub {
+ my ($vmid, $node, $vm_stats) = @_;
+
+ #implement me
+
+ return undef;
+};
+
+my $check_cpu_pressure = sub {
+ my ($vm_stats) = @_;
+
+ my $eviction_threshold = 0.75;
+ return 1 if $vm_stats->{cpu_pressure} > $eviction_threshold;
+};
+
+my $check_pseudo_mem_pressure = sub {
+ my ($node_stats, $vm_stats) = @_;
+
+ my $eviction_threshold = 85;
+
+ #if ksm is already big, lowering threshold
+ $eviction_threshold = 80 if $node_stats->{ksm} > ($node_stats->{maxmem} * 0.2);
+ my $node_mem_threshold = $node_stats->{maxmem} * $eviction_threshold/100;
+
+ if ($node_stats->{mem} > $node_mem_threshold) {
+ #if removing the vm fix the threshold
+ #we compute a pseudo presssure (percentage of host mem over threshold vs vm mem)
+
+ if (($node_stats->{mem} - $vm_stats->{mem}) < $node_mem_threshold) {
+ $vm_stats->{mem_pseudo_pressure} = ($node_stats->{mem} - $node_mem_threshold) / $vm_stats->{mem};
+ }
+ #others vm are still added (case if not a single vm can reduce memory under threshold)
+ #in the case, we use simply the vm used memory order
+ return 1;
+ }
+};
+
+my $get_bad_vms = sub {
+ my($ss, $sc, $online_nodes) = @_;
+
+ my $bad_vms = {};
+
+ foreach my $sid (keys %$ss) {
+
+ my $cd = $sc->{$sid};
+ my $sd = $ss->{$sid};
+
+ next if !$cd;
+ # can't live migrate ct
+ next if $cd->{type} ne 'vm';
+ # only migrate started state
+ next if $cd->{state} ne 'started';
+
+ # don't migrate same sid multiple time
+ next if $self->{balancer}->{last_migrate_sid} && $self->{balancer}->{last_migrate_sid} eq $sid;
+
+ my $node = $cd->{node};
+ #skip if node is not online or not responding;
+ next if !defined($online_nodes->{$node});
+
+ my $node_stats = $online_nodes->{$node}->{stats};
+ my $vm_stats = $sd->{stats};
+
+ # skip vm is recently started or migrated
+ next if !defined($vm_stats->{uptime}) || $vm_stats->{uptime} < 300;
+
+ # fixme : skip if local disk
+ # &$check_vm_disks_local($storecfg, $vmconf, $vmid);
+ # fixme : skip if local ressources
+ #PVE::QemuServer::check_local_resources($vmconf, 1);
+
+
+ $vm_stats->{affinity} = 0;
+ $vm_stats->{mem_pseudo_pressure} = 0;
+
+ my $add_vm = undef;
+
+ $add_vm = 1 if &$check_anti_affinity($sid, $node, $vm_stats);
+ $add_vm = 1 if &$check_cpu_pressure($vm_stats);
+ $add_vm = 1 if &$check_pseudo_mem_pressure($node_stats, $vm_stats);
+ next if !$add_vm;
+
+ my $prio = {
+ affinity => $vm_stats->{affinity},
+ mem_pseudo_pressure => $vm_stats->{mem_pseudo_pressure},
+ mem => $vm_stats->{mem},
+ totalcpu => $vm_stats->{totalcpu},
+ cpu_pressure => $vm_stats->{cpu_pressure},
+ };
+
+ $bad_vms->{$sid} = $prio;
+ }
+ return $bad_vms if keys %$bad_vms;
+};
+
+my $get_score = sub {
+ my ($self, $vms) = @_;
+
+ my $weights = $self->{balancer}->{topsis}->{bad_vms}->{weights};
+ my $order = $self->{balancer}->{topsis}->{bad_vms}->{order};
+ my $scores = PVE::HA::Balancer::Topsis::score($vms, $weights, $order);
+
+ return $scores;
+};
+
+sub get_vm_targetnode {
+ my($self, $ss, $sc) = @_;
+
+ my $online_nodes = $self->{online_node_stats};
+
+ my $bad_vms = &$get_bad_vms($ss, $sc, $online_nodes);
+ return if !$bad_vms;
+
+ my $vm_scores = &$get_score($self, $bad_vms);
+
+ foreach my $sid (
+ sort {
+ $vm_scores->{$b}->{score} <=> $vm_scores->{$a}->{score}
+ } keys %$vm_scores) {
+
+ my $cd = $sc->{$sid};
+ my $sd = $ss->{$sid};
+
+ my $node = $self->find_node_target($cd , $sd);
+ next if !$node;
+
+ # register last sid we tried to migrate, to not try to balance it in loop
+ $self->{balancer}->{last_migrate_sid} = $sid;
+
+ return ($sid, $node);
+ }
+}
+
+sub compute_ahp_weights {
+ my ($self) = @_;
+
+ my $bestorder = {
+ affinity => "+",
+ cpu_pressure => "+",
+ totalcpu => "-",
+ mem => "-",
+ mem_pseudo_pressure => "+",
+ };
+
+ my $preferences = {
+ affinity => {
+ affinity => 1,
+ cpu_pressure => 9,
+ mem_pseudo_pressure => 9,
+ mem => 9,
+ total_cpu => 9,
+ },
+ cpu_pressure => {
+ cpu_pressure => 1,
+ mem_pseudo_pressure => 3,
+ mem => 3,
+ total_cpu => 5,
+ },
+ mem_pseudo_pressure => {
+ mem_pseudo_pressure => 1,
+ mem => 3,
+ total_cpu => 3,
+ },
+ mem => {
+ mem => 1,
+ total_cpu => 2,
+ },
+ total_cpu => {
+ total_cpu => 1,
+ },
+ };
+
+ my $weights = PVE::HA::Balancer::AHP::compute_weights($preferences);
+
+ $self->{balancer}->{topsis}->{bad_vms}->{weights} = $weights;
+ $self->{balancer}->{topsis}->{bad_vms}->{order} = $bestorder;
+}
+
+
+1;
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index e021d60..6fa866a 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -9,6 +9,7 @@ use PVE::HA::Tools ':exit_codes';
use PVE::HA::NodeStatus;
use PVE::HA::Balancer::Stats;
use PVE::HA::Balancer::Nodes;
+use PVE::HA::Balancer::Services;
sub new {
my ($this, $haenv) = @_;
@@ -30,6 +31,7 @@ sub new {
PVE::HA::Balancer::Stats::compute_ahp_recovery_weights($self);
PVE::HA::Balancer::Nodes::compute_ahp_weights($self);
+ PVE::HA::Balancer::Services::compute_ahp_weights($self);
return $self;
}
@@ -251,6 +253,9 @@ my $change_service_state = sub {
$text_state = " ($text_state)" if $text_state;
$haenv->log('info', "service '$sid': state changed from '${old_state}'" .
" to '${new_state}'$text_state");
+
+ $self->{balancer}->{last_state_change} = $haenv->get_time();
+
};
# clean up a possible bad state from a recovered service to allow its start
@@ -402,6 +407,7 @@ sub manage {
PVE::HA::Balancer::Stats::recompute_node_service_stats($self, $ss, $sc);
$self->{storecfg} = $haenv->read_storecfg();
+ $self->balancer_status();
foreach my $sid (
sort {
@@ -493,10 +499,12 @@ sub manage {
$haenv->log('notice', "node '$node' in fence state but no services to-fence! admin interference?!");
$repeat = 1 if $ns->fence_node($node);
}
-
last if !$repeat;
}
+ #if all services states are stable, do loadbalancing
+ $self->loadbalance($ss, $sc);
+
$self->flush_master_status();
}
@@ -871,4 +879,41 @@ sub find_node_target {
return PVE::HA::Balancer::Nodes::find_target($self, $cd, $sd, $group_members_prio);
}
+sub loadbalance {
+ my($self, $ss, $sc) = @_;
+
+ return if !$self->{balancer}->{enabled};
+
+ my $haenv = $self->{haenv};
+
+ my $now = $haenv->get_time();
+
+ #check only once per minute
+ $self->{balancer}->{lastcheck} = $now if !defined($self->{balancer}->{lastcheck});
+ my $delta_check = $now - $self->{balancer}->{lastcheck};
+ return if $delta_check < 60;
+ $self->{balancer}->{lastcheck} = $now;
+
+ #check only 1 minute after last state change
+ $self->{balancer}->{last_state_change} = $now if !defined($self->{balancer}->{last_state_change});
+ my $delta_state_change = $now - $self->{balancer}->{last_state_change};
+ return if !$delta_state_change || $delta_state_change < 60;
+
+ my ($sid, $node) = PVE::HA::Balancer::Services::get_vm_targetnode($self, $ss, $sc);
+
+ return if !$sid && !$node;
+
+ # do migration
+ my $sd = $ss->{$sid};
+ $self->{haenv}->log('info', "balancer - migrate service '$sid' to node '$node' (running)");
+ &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node);
+}
+
+sub balancer_status {
+ my($self) = @_;
+
+ my $dc_ha_cfg = $self->{haenv}->get_ha_settings();
+ $self->{balancer}->{enabled} = $dc_ha_cfg->{balancer};
+}
+
1;
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 5/8] sim : hardware: add read stats
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
` (3 preceding siblings ...)
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 4/8] add vm loadbalancing Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 6/8] add resources group Alexandre Derumier
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
---
src/PVE/HA/Sim/Hardware.pm | 150 +++++++++++++++++++++++++++++++++++++
1 file changed, 150 insertions(+)
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index 96a4064..3c3622b 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -110,6 +110,46 @@ sub read_service_config {
return $conf;
}
+sub read_service_stats {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/service_stats";
+ my $conf = PVE::HA::Tools::read_json_from_file($filename);
+ return $conf;
+}
+
+sub read_vm_config {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/vm_config";
+ my $conf = PVE::HA::Tools::read_json_from_file($filename);
+ return $conf;
+}
+
+sub read_node_stats {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/node_stats";
+ my $conf = PVE::HA::Tools::read_json_from_file($filename);
+ return $conf;
+}
+
+sub read_node_config {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/node_config";
+ my $conf = PVE::HA::Tools::read_json_from_file($filename);
+ return $conf;
+}
+
+sub read_storecfg {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/storecfg";
+ my $conf = PVE::HA::Tools::read_json_from_file($filename);
+ return $conf;
+}
+
sub update_service_config {
my ($self, $sid, $param) = @_;
@@ -133,6 +173,51 @@ sub write_service_config {
return PVE::HA::Tools::write_json_to_file($filename, $conf);
}
+sub write_vm_config {
+ my ($self, $conf) = @_;
+
+ $self->{vm_config} = $conf;
+
+ my $filename = "$self->{statusdir}/vm_config";
+ return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
+sub write_service_stats {
+ my ($self, $conf) = @_;
+
+ $self->{service_stats} = $conf;
+
+ my $filename = "$self->{statusdir}/service_stats";
+ return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
+sub write_node_config {
+ my ($self, $conf) = @_;
+
+ $self->{node_config} = $conf;
+
+ my $filename = "$self->{statusdir}/node_config";
+ return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
+sub write_node_stats {
+ my ($self, $conf) = @_;
+
+ $self->{node_stats} = $conf;
+
+ my $filename = "$self->{statusdir}/node_stats";
+ return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
+sub write_storecfg {
+ my ($self, $conf) = @_;
+
+ $self->{storecfg} = $conf;
+
+ my $filename = "$self->{statusdir}/storecfg";
+ return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
sub read_fence_config {
my ($self) = @_;
@@ -384,6 +469,66 @@ sub new {
$self->write_service_config($conf);
}
+ if (-f "$testdir/service_stats") {
+ copy("$testdir/service_stats", "$statusdir/service_stats");
+ } else {
+ my $conf = {
+ '101' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, cpu_pressure => 0 },
+ '102' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, cpu_pressure => 0 },
+ '103' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, cpu_pressure => 0 },
+ '104' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, cpu_pressure => 0 },
+ '105' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, cpu_pressure => 0 },
+ '106' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, cpu_pressure => 0 },
+ };
+ $self->write_service_stats($conf);
+ }
+
+ if (-f "$testdir/vm_config") {
+ copy("$testdir/vm_config", "$statusdir/vm_config");
+ } else {
+ my $conf = {
+ '101' => { ostype => 'l26', storage => 'local' },
+ '102' => { ostype => 'l26', storage => 'local' },
+ '103' => { ostype => 'l26', storage => 'local' },
+ '104' => { ostype => 'l26', storage => 'local' },
+ '105' => { ostype => 'l26', storage => 'local' },
+ '106' => { ostype => 'l26', storage => 'local' },
+ };
+ $self->write_vm_config($conf);
+ }
+
+ if (-f "$testdir/node_config") {
+ copy("$testdir/node_config", "$statusdir/node_config");
+ } else {
+ my $conf = {
+ 'node1' => { cpumodel => '' },
+ 'node2' => { cpumodel => '' },
+ 'node3' => { cpumodel => '' },
+ };
+ $self->write_node_config($conf);
+ }
+
+ if (-f "$testdir/node_stats") {
+ copy("$testdir/node_stats", "$statusdir/node_stats");
+ } else {
+ my $conf = {
+ 'node1' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, ksm => 0, cpu_pressure => 0 },
+ 'node2' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, ksm => 0, cpu_pressure => 0 },
+ 'node3' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, ksm => 0, cpu_pressure => 0 },
+ };
+ $self->write_node_stats($conf);
+ }
+
+ if (-f "$testdir/storecfg") {
+ copy("$testdir/storecfg", "$statusdir/storecfg");
+ } else {
+ my $conf = {
+ 'local' => { nodes => { node1 => 1, node2 => 1, node3 => 1 } },
+ 'local-lvm' => { nodes => { node1 => 1, node2 => 1, node3 => 1 } },
+ };
+ $self->write_storecfg($conf);
+ }
+
if (-f "$testdir/hardware_status") {
copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
die "Copy failed: $!\n";
@@ -417,6 +562,11 @@ sub new {
}
$self->{service_config} = $self->read_service_config();
+ $self->{vm_config} = $self->read_vm_config();
+ $self->{service_stats} = $self->read_service_stats();
+ $self->{node_stats} = $self->read_node_stats();
+ $self->{node_config} = $self->read_node_config();
+ $self->{storecfg} = $self->read_storecfg();
return $self;
}
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 6/8] add resources group
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
` (4 preceding siblings ...)
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 5/8] sim : hardware: add read stats Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 7/8] add affinity Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 8/8] add tests Alexandre Derumier
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
---
debian/pve-ha-manager.install | 1 +
src/PVE/HA/Config.pm | 22 +++++++++
src/PVE/HA/Env.pm | 6 +++
src/PVE/HA/Env/PVE2.pm | 6 +++
src/PVE/HA/Makefile | 2 +-
src/PVE/HA/Manager.pm | 1 +
src/PVE/HA/ResourcesGroups.pm | 90 +++++++++++++++++++++++++++++++++++
src/PVE/HA/Sim/Env.pm | 8 ++++
src/PVE/HA/Sim/Hardware.pm | 15 ++++++
src/PVE/HA/Tools.pm | 6 +++
10 files changed, 156 insertions(+), 1 deletion(-)
create mode 100644 src/PVE/HA/ResourcesGroups.pm
diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install
index 3bc7cc8..a4486eb 100644
--- a/debian/pve-ha-manager.install
+++ b/debian/pve-ha-manager.install
@@ -35,6 +35,7 @@
/usr/share/perl5/PVE/HA/Manager.pm
/usr/share/perl5/PVE/HA/NodeStatus.pm
/usr/share/perl5/PVE/HA/Resources.pm
+/usr/share/perl5/PVE/HA/ResourcesGroups.pm
/usr/share/perl5/PVE/HA/Resources/PVECT.pm
/usr/share/perl5/PVE/HA/Resources/PVEVM.pm
/usr/share/perl5/PVE/HA/Tools.pm
diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm
index 993b456..f9805d2 100644
--- a/src/PVE/HA/Config.pm
+++ b/src/PVE/HA/Config.pm
@@ -6,11 +6,13 @@ use JSON;
use PVE::HA::Tools;
use PVE::HA::Groups;
+use PVE::HA::ResourcesGroups;
use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
use PVE::HA::Resources;
my $manager_status_filename = "ha/manager_status";
my $ha_groups_config = "ha/groups.cfg";
+my $ha_resources_groups_config = "ha/resources_groups.cfg";
my $ha_resources_config = "ha/resources.cfg";
my $crm_commands_filename = "ha/crm_commands";
my $ha_fence_config = "ha/fence.cfg";
@@ -21,6 +23,9 @@ cfs_register_file($crm_commands_filename,
cfs_register_file($ha_groups_config,
sub { PVE::HA::Groups->parse_config(@_); },
sub { PVE::HA::Groups->write_config(@_); });
+cfs_register_file($ha_resources_groups_config,
+ sub { PVE::HA::ResourceGroups->parse_config(@_); },
+ sub { PVE::HA::ResourceGroups->write_config(@_); });
cfs_register_file($ha_resources_config,
sub { PVE::HA::Resources->parse_config(@_); },
sub { PVE::HA::Resources->write_config(@_); });
@@ -78,6 +83,12 @@ sub parse_groups_config {
return PVE::HA::Groups->parse_config($filename, $raw);
}
+sub parse_resources_roups_config {
+ my ($filename, $raw) = @_;
+
+ return PVE::HA::Resources_Groups->parse_config($filename, $raw);
+}
+
sub parse_resources_config {
my ($filename, $raw) = @_;
@@ -200,6 +211,17 @@ sub write_group_config {
cfs_write_file($ha_groups_config, $cfg);
}
+sub read_resources_group_config {
+
+ return cfs_read_file($ha_resources_groups_config);
+}
+
+sub write_resources_group_config {
+ my ($cfg) = @_;
+
+ cfs_write_file($ha_resources_groups_config, $cfg);
+}
+
sub write_resources_config {
my ($cfg) = @_;
diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index 757c5e0..0c917e5 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -130,6 +130,12 @@ sub read_group_config {
return $self->{plug}->read_group_config();
}
+sub read_resources_groups_config {
+ my ($self) = @_;
+
+ return $self->{plug}->read_resources_groups_config();
+}
+
# this should return a hash containing info
# what nodes are members and online.
sub get_node_info {
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index ee97559..6e0a55f 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -189,6 +189,12 @@ sub read_group_config {
return PVE::HA::Config::read_group_config();
}
+sub read_resources_group_config {
+ my ($self) = @_;
+
+ return PVE::HA::Config::read_resources_groups_config();
+}
+
# this should return a hash containing info
# what nodes are members and online.
sub get_node_info {
diff --git a/src/PVE/HA/Makefile b/src/PVE/HA/Makefile
index a548c86..bafc7c0 100644
--- a/src/PVE/HA/Makefile
+++ b/src/PVE/HA/Makefile
@@ -1,5 +1,5 @@
SIM_SOURCES=CRM.pm Env.pm Groups.pm Resources.pm LRM.pm Manager.pm \
- NodeStatus.pm Tools.pm FenceConfig.pm Fence.pm
+ NodeStatus.pm Tools.pm FenceConfig.pm Fence.pm ResourcesGroups.pm
SOURCES=${SIM_SOURCES} Config.pm
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 6fa866a..4e318bd 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -371,6 +371,7 @@ sub manage {
my $sc = $haenv->read_service_config();
$self->{groups} = $haenv->read_group_config(); # update
+ $self->{vmgroups} = $haenv->read_vmgroup_config();
# compute new service status
diff --git a/src/PVE/HA/ResourcesGroups.pm b/src/PVE/HA/ResourcesGroups.pm
new file mode 100644
index 0000000..10f97a6
--- /dev/null
+++ b/src/PVE/HA/ResourcesGroups.pm
@@ -0,0 +1,90 @@
+package PVE::HA::ResourcesGroups;
+
+use strict;
+use warnings;
+
+use PVE::JSONSchema qw(get_standard_option);
+use PVE::SectionConfig;
+use PVE::HA::Tools;
+
+use base qw(PVE::SectionConfig);
+
+my $defaultData = {
+ propertyList => {
+ type => {
+ description => "Group type.",
+ optional => 1,
+ },
+ group => get_standard_option('pve-ha-group-id',
+ { completion => \&PVE::HA::Tools::complete_group }),
+ resources => get_standard_option('pve-ha-resource-id-list'),
+ affinity => {
+ description => "group or separate vms on same host",
+ type => 'string',
+ optional => 1,
+ enum => ['group', 'separate'],
+ },
+ },
+};
+
+sub type {
+ return 'resourcegroup';
+}
+
+sub options {
+ return {
+ resources => { optional => 0 },
+ affinity => { optional => 1 },
+ };
+}
+
+sub private {
+ return $defaultData;
+}
+
+sub decode_value {
+ my ($class, $type, $key, $value) = @_;
+
+ if ($key eq 'resources') {
+ my $res = {};
+
+ foreach my $resource (PVE::Tools::split_list($value)) {
+ if (PVE::HA::Tools::pve_verify_ha_resource_id($resource)) {
+ $res->{$resource} = 1;
+ }
+ }
+
+ return $res;
+ }
+
+ return $value;
+}
+
+sub encode_value {
+ my ($class, $type, $key, $value) = @_;
+
+ if ($key eq 'resources') {
+ return join(',', keys(%$value));
+ }
+
+ return $value;
+}
+
+sub parse_section_header {
+ my ($class, $line) = @_;
+
+ if ($line =~ m/^(\S+):\s*(\S+)\s*$/) {
+ my ($type, $group) = (lc($1), $2);
+ my $errmsg = undef; # set if you want to skip whole section
+ eval { PVE::JSONSchema::pve_verify_configid($group); };
+ $errmsg = $@ if $@;
+ my $config = {}; # to return additional attributes
+ return ($type, $group, $errmsg, $config);
+ }
+ return undef;
+}
+
+__PACKAGE__->register();
+__PACKAGE__->init();
+
+1;
diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index b286708..c3c541b 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -253,6 +253,14 @@ sub read_group_config {
return $self->{hardware}->read_group_config();
}
+sub read_resources_groups_config {
+ my ($self) = @_;
+
+ $assert_cfs_can_rw->($self);
+
+ return $self->{hardware}->read_resources_groups_config();
+}
+
# this is normally only allowed by the master to recover a _fenced_ service
sub steal_service {
my ($self, $sid, $current_node, $new_node) = @_;
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index 3c3622b..70c2031 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -18,6 +18,7 @@ use POSIX qw(strftime EINTR);
use PVE::HA::FenceConfig;
use PVE::HA::Groups;
+use PVE::HA::ResourcesGroups;
my $watchdog_timeout = 60;
@@ -397,6 +398,16 @@ sub read_group_config {
return PVE::HA::Groups->parse_config($filename, $raw);
}
+sub read_resources_groups_config {
+ my ($self) = @_;
+
+ my $filename = "$self->{statusdir}/resources_groups";
+ my $raw = '';
+ $raw = PVE::Tools::file_get_contents($filename) if -f $filename;
+
+ return PVE::HA::ResourcesGroups->parse_config($filename, $raw);
+}
+
sub read_service_status {
my ($self, $node) = @_;
@@ -455,6 +466,10 @@ sub new {
PVE::Tools::file_set_contents("$statusdir/groups", $default_group_config);
}
+ if (-f "$testdir/resources_groups") {
+ copy("$testdir/resources_groups", "$statusdir/resources_groups");
+ }
+
if (-f "$testdir/service_config") {
copy("$testdir/service_config", "$statusdir/service_config");
} else {
diff --git a/src/PVE/HA/Tools.pm b/src/PVE/HA/Tools.pm
index 1a88351..5b9823a 100644
--- a/src/PVE/HA/Tools.pm
+++ b/src/PVE/HA/Tools.pm
@@ -44,6 +44,12 @@ PVE::JSONSchema::register_standard_option('pve-ha-resource-id', {
type => 'string', format => 'pve-ha-resource-id',
});
+PVE::JSONSchema::register_standard_option('pve-ha-resource-id-list', {
+ description => "List of resources",
+ type => 'string', format => 'pve-ha-resource-id-list',
+ typetext => '<resource>{,<resource>}*',
+});
+
PVE::JSONSchema::register_format('pve-ha-resource-or-vm-id', \&pve_verify_ha_resource_or_vm_id);
sub pve_verify_ha_resource_or_vm_id {
my ($sid, $noerr) = @_;
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 7/8] add affinity
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
` (5 preceding siblings ...)
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 6/8] add resources group Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 8/8] add tests Alexandre Derumier
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
---
src/PVE/HA/Balancer/Nodes.pm | 36 +++++++++++++++--------------
src/PVE/HA/Balancer/Services.pm | 40 +++++++++++++++++++++++----------
src/PVE/HA/Manager.pm | 24 ++++++++++++++++----
3 files changed, 67 insertions(+), 33 deletions(-)
diff --git a/src/PVE/HA/Balancer/Nodes.pm b/src/PVE/HA/Balancer/Nodes.pm
index a06ed62..bce4c38 100644
--- a/src/PVE/HA/Balancer/Nodes.pm
+++ b/src/PVE/HA/Balancer/Nodes.pm
@@ -4,7 +4,7 @@ use strict;
use warnings;
use PVE::HA::Balancer::Topsis;
use PVE::HA::Balancer::AHP;
-
+use PVE::HA::Balancer::Services;
my $compute_node_target_cpu_pct = sub{
my ($node_stats, $vm_stats) = @_;
@@ -21,19 +21,16 @@ my $compute_node_target_mem_pct = sub {
};
my $add_prio = sub {
- my ($self, $sd, $nodename, $group_members_prio) = @_;
-
- my $vm_stats = $sd->{stats};
- my $node_stats = $self->{online_node_stats}->{$nodename}->{stats};
+ my ($self, $nodename, $group_members_prio, $target_stats) = @_;
my $node = {};
$node->{prio} = $group_members_prio->{$nodename};
- $node->{affinity} = 0; #fixme, need to implement vm group
+ $node->{affinity} = $target_stats->{affinity} || 0;
$node->{online_node_usage} = $self->{online_node_usage}->{$nodename};
$node->{name} = $nodename;
$node->{cpu_pressure} = 0; #fixme, need to stream rrd graph first
- $node->{target_cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
- $node->{target_mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats);
+ $node->{target_cpu_pct} = $target_stats->{cpu_pct} || 0;
+ $node->{target_mem_pct} = $target_stats->{mem_pct} || 0;
return $node;
};
@@ -80,7 +77,7 @@ my $check_cpumodel_compatibility = sub {
};
my $check_target_load = sub {
- my ($self, $sd, $node) = @_;
+ my ($self, $sid, $sd, $node, $target_stats) = @_;
return 1 if !$self->{balancer}->{enabled};
@@ -91,8 +88,8 @@ my $check_target_load = sub {
# if ksm sharing is already huge (20% of total memory), reduce mem threshold to 75%
$max_threshold->{mem} = 75 if $node_stats->{ksm} > $node_stats->{maxmem} * 0.2;
- my $target_mem_percent = &$compute_node_target_mem_pct($node_stats, $vm_stats);
- return if $target_mem_percent > $max_threshold->{mem};
+ $target_stats->{mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats);
+ return if $target_stats->{mem_pct} > $max_threshold->{mem};
#don't use node if already too much global pressure (some cores are already more than 100%, so we can't trust cpu average)
return if $node_stats->{cpu_pressure} > $max_threshold->{cpu_pressure};
@@ -100,8 +97,12 @@ my $check_target_load = sub {
#don't use node if a vm is already overloaded on this node
return if $node_stats->{max_vm_pressure} > $max_threshold->{vm_pressure};
- my $target_cpu_percent = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
- return if $target_cpu_percent > $max_threshold->{cpu};
+ $target_stats->{cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
+ return if $target_stats->{cpu_pct} > $max_threshold->{cpu};
+
+ #don't use node if affinity is bigger or equal than current affinity
+ $target_stats->{affinity} = PVE::HA::Balancer::Services::compute_affinity($self, $sid, $node);
+ return if defined($vm_stats->{affinity}) && $target_stats->{affinity} != 0 && $target_stats->{affinity} >= $vm_stats->{affinity};
return 1;
};
@@ -131,7 +132,7 @@ my $check_hard_constraints = sub {
};
sub find_target {
- my($self, $cd, $sd, $group_members_prio) = @_;
+ my($self, $sid, $cd, $sd, $group_members_prio) = @_;
my $online_nodes = $self->{online_node_stats};
@@ -142,11 +143,12 @@ sub find_target {
#### FILTERING NODES WITH HARD CONSTRAINTS (vm can't be started)
next if !&$check_hard_constraints($self, $sd, $node, $group_members_prio);
- ### FILTERING too much loaded nodes
- next if !&$check_target_load($self,$sd, $node);
+ ### FILTERING too much loaded nodes and compute target stats
+ my $target_stats = {};
+ next if !&$check_target_load($self, $sid, $sd, $node, $target_stats);
#### compute differents prio
- $target_nodes->{$node} = &$add_prio($self, $sd, $node, $group_members_prio);
+ $target_nodes->{$node} = &$add_prio($self, $node, $group_members_prio, $target_stats);
}
# if ressource aware is enabled, order by score
diff --git a/src/PVE/HA/Balancer/Services.pm b/src/PVE/HA/Balancer/Services.pm
index 6cce6a7..d095b67 100644
--- a/src/PVE/HA/Balancer/Services.pm
+++ b/src/PVE/HA/Balancer/Services.pm
@@ -5,13 +5,30 @@ use warnings;
use PVE::HA::Balancer::Topsis;
use PVE::HA::Balancer::AHP;
-my $check_anti_affinity = sub {
- my ($vmid, $node, $vm_stats) = @_;
-
- #implement me
+sub compute_affinity {
+ my ($self, $sid, $node) = @_;
+
+ my $groups_resources = $self->{groups_resources};
+ my $resources_groups = $self->{resources_groups};
+ my $ss = $self->{ss};
+
+ my $affinity_score = 0;
+
+ my $resource_groups = $resources_groups->{$sid};
+ foreach my $groupid (keys %$resource_groups) {
+ my $affinity = $groups_resources->{$groupid}->{affinity};
+ next if !$affinity;
+ my $resources = $groups_resources->{$groupid}->{resources};
+ foreach my $othersid (keys %$resources) {
+ next if $othersid eq $sid;
+ my $other_service_node = $ss->{$othersid}->{node};
+ $affinity_score++ if ($affinity eq 'separate' && $other_service_node eq $node) ||
+ ($affinity eq 'group' && $other_service_node ne $node);
+ }
+ }
- return undef;
-};
+ return $affinity_score;
+}
my $check_cpu_pressure = sub {
my ($vm_stats) = @_;
@@ -43,7 +60,7 @@ my $check_pseudo_mem_pressure = sub {
};
my $get_bad_vms = sub {
- my($ss, $sc, $online_nodes) = @_;
+ my($self, $ss, $sc, $online_nodes) = @_;
my $bad_vms = {};
@@ -67,7 +84,6 @@ my $get_bad_vms = sub {
my $node_stats = $online_nodes->{$node}->{stats};
my $vm_stats = $sd->{stats};
-
# skip vm is recently started or migrated
next if !defined($vm_stats->{uptime}) || $vm_stats->{uptime} < 300;
@@ -77,12 +93,12 @@ my $get_bad_vms = sub {
#PVE::QemuServer::check_local_resources($vmconf, 1);
- $vm_stats->{affinity} = 0;
$vm_stats->{mem_pseudo_pressure} = 0;
+ $vm_stats->{affinity} = compute_affinity($self, $sid, $node);
my $add_vm = undef;
- $add_vm = 1 if &$check_anti_affinity($sid, $node, $vm_stats);
+ $add_vm = 1 if $vm_stats->{affinity};
$add_vm = 1 if &$check_cpu_pressure($vm_stats);
$add_vm = 1 if &$check_pseudo_mem_pressure($node_stats, $vm_stats);
next if !$add_vm;
@@ -115,7 +131,7 @@ sub get_vm_targetnode {
my $online_nodes = $self->{online_node_stats};
- my $bad_vms = &$get_bad_vms($ss, $sc, $online_nodes);
+ my $bad_vms = &$get_bad_vms($self, $ss, $sc, $online_nodes);
return if !$bad_vms;
my $vm_scores = &$get_score($self, $bad_vms);
@@ -128,7 +144,7 @@ sub get_vm_targetnode {
my $cd = $sc->{$sid};
my $sd = $ss->{$sid};
- my $node = $self->find_node_target($cd , $sd);
+ my $node = $self->find_node_target($sid, $cd , $sd);
next if !$node;
# register last sid we tried to migrate, to not try to balance it in loop
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 4e318bd..03b0520 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -371,7 +371,7 @@ sub manage {
my $sc = $haenv->read_service_config();
$self->{groups} = $haenv->read_group_config(); # update
- $self->{vmgroups} = $haenv->read_vmgroup_config();
+ $self->get_resources_groups();
# compute new service status
@@ -833,7 +833,7 @@ sub next_state_recovery {
$self->recompute_online_node_usage(); # we want the most current node state
- my $recovery_node = $self->find_node_target($cd , $sd);
+ my $recovery_node = $self->find_node_target($sid, $cd , $sd);
if ($recovery_node) {
my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'";
@@ -871,13 +871,13 @@ sub next_state_recovery {
}
sub find_node_target {
- my($self, $cd, $sd) = @_;
+ my($self, $sid, $cd, $sd) = @_;
my $online_nodes = $self->{online_node_stats};
my $groups = $self->{groups};
my $hagroup = get_service_group($groups, $online_nodes, $cd);
my ($pri_groups, $group_members_prio) = get_node_priority_groups($hagroup, $online_nodes);
- return PVE::HA::Balancer::Nodes::find_target($self, $cd, $sd, $group_members_prio);
+ return PVE::HA::Balancer::Nodes::find_target($self, $sid, $cd, $sd, $group_members_prio);
}
sub loadbalance {
@@ -917,4 +917,20 @@ sub balancer_status {
$self->{balancer}->{enabled} = $dc_ha_cfg->{balancer};
}
+sub get_resources_groups {
+ my ($self) = @_;
+
+ my $resources_groups_config = $self->{haenv}->read_resources_groups_config();
+ my $groups_resources = $resources_groups_config->{ids};
+ my $resources_groups = {};
+ foreach my $groupid (keys %$groups_resources) {
+ my $resources = $groups_resources->{$groupid}->{resources};
+ foreach my $sid (keys %$resources) {
+ $resources_groups->{$sid}->{$groupid} = 1;
+ }
+ }
+ $self->{resources_groups} = $resources_groups;
+ $self->{groups_resources} = $groups_resources;
+}
+
1;
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [pve-devel] [PATCH pve-ha-manager 8/8] add tests
2022-04-27 15:33 [pve-devel] [PATCH pve-ha-manager 0/8] WIP: ressource aware HA manager + balancer Alexandre Derumier
` (6 preceding siblings ...)
2022-04-27 15:33 ` [pve-devel] [PATCH pve-ha-manager 7/8] add affinity Alexandre Derumier
@ 2022-04-27 15:33 ` Alexandre Derumier
7 siblings, 0 replies; 9+ messages in thread
From: Alexandre Derumier @ 2022-04-27 15:33 UTC (permalink / raw)
To: pve-devel
---
src/test/test-basic0-balance-affinity/README | 1 +
src/test/test-basic0-balance-affinity/cmdlist | 3 ++
.../datacenter.cfg | 5 ++
.../hardware_status | 5 ++
.../test-basic0-balance-affinity/log.expect | 40 ++++++++++++++
.../manager_status | 1 +
.../test-basic0-balance-affinity/node_stats | 5 ++
.../resources_groups | 3 ++
.../service_config | 5 ++
.../service_stats | 5 ++
.../test-basic0-balance-cpupressure/README | 1 +
.../test-basic0-balance-cpupressure/cmdlist | 3 ++
.../datacenter.cfg | 5 ++
.../hardware_status | 5 ++
.../log.expect | 40 ++++++++++++++
.../manager_status | 1 +
.../node_stats | 5 ++
.../service_config | 5 ++
.../service_stats | 5 ++
src/test/test-basic0-recovery-affinity/README | 1 +
.../test-basic0-recovery-affinity/cmdlist | 4 ++
.../datacenter.cfg | 5 ++
.../hardware_status | 5 ++
.../test-basic0-recovery-affinity/log.expect | 53 +++++++++++++++++++
.../manager_status | 1 +
.../test-basic0-recovery-affinity/node_stats | 5 ++
.../resources_groups | 3 ++
.../service_config | 5 ++
.../service_stats | 5 ++
.../test-basic0-recovery-antifinity/README | 1 +
.../test-basic0-recovery-antifinity/cmdlist | 4 ++
.../datacenter.cfg | 5 ++
.../hardware_status | 5 ++
.../log.expect | 53 +++++++++++++++++++
.../manager_status | 1 +
.../node_stats | 5 ++
.../resources_groups | 3 ++
.../service_config | 5 ++
.../service_stats | 5 ++
src/test/test-basic00-recovery-storage/README | 1 +
.../test-basic00-recovery-storage/cmdlist | 4 ++
.../hardware_status | 5 ++
.../test-basic00-recovery-storage/log.expect | 52 ++++++++++++++++++
.../manager_status | 1 +
.../service_config | 5 ++
.../test-basic00-recovery-storage/storecfg | 3 ++
.../test-basic00-recovery-storage/vm_config | 5 ++
47 files changed, 393 insertions(+)
create mode 100644 src/test/test-basic0-balance-affinity/README
create mode 100644 src/test/test-basic0-balance-affinity/cmdlist
create mode 100644 src/test/test-basic0-balance-affinity/datacenter.cfg
create mode 100644 src/test/test-basic0-balance-affinity/hardware_status
create mode 100644 src/test/test-basic0-balance-affinity/log.expect
create mode 100644 src/test/test-basic0-balance-affinity/manager_status
create mode 100644 src/test/test-basic0-balance-affinity/node_stats
create mode 100644 src/test/test-basic0-balance-affinity/resources_groups
create mode 100644 src/test/test-basic0-balance-affinity/service_config
create mode 100644 src/test/test-basic0-balance-affinity/service_stats
create mode 100644 src/test/test-basic0-balance-cpupressure/README
create mode 100644 src/test/test-basic0-balance-cpupressure/cmdlist
create mode 100644 src/test/test-basic0-balance-cpupressure/datacenter.cfg
create mode 100644 src/test/test-basic0-balance-cpupressure/hardware_status
create mode 100644 src/test/test-basic0-balance-cpupressure/log.expect
create mode 100644 src/test/test-basic0-balance-cpupressure/manager_status
create mode 100644 src/test/test-basic0-balance-cpupressure/node_stats
create mode 100644 src/test/test-basic0-balance-cpupressure/service_config
create mode 100644 src/test/test-basic0-balance-cpupressure/service_stats
create mode 100644 src/test/test-basic0-recovery-affinity/README
create mode 100644 src/test/test-basic0-recovery-affinity/cmdlist
create mode 100644 src/test/test-basic0-recovery-affinity/datacenter.cfg
create mode 100644 src/test/test-basic0-recovery-affinity/hardware_status
create mode 100644 src/test/test-basic0-recovery-affinity/log.expect
create mode 100644 src/test/test-basic0-recovery-affinity/manager_status
create mode 100644 src/test/test-basic0-recovery-affinity/node_stats
create mode 100644 src/test/test-basic0-recovery-affinity/resources_groups
create mode 100644 src/test/test-basic0-recovery-affinity/service_config
create mode 100644 src/test/test-basic0-recovery-affinity/service_stats
create mode 100644 src/test/test-basic0-recovery-antifinity/README
create mode 100644 src/test/test-basic0-recovery-antifinity/cmdlist
create mode 100644 src/test/test-basic0-recovery-antifinity/datacenter.cfg
create mode 100644 src/test/test-basic0-recovery-antifinity/hardware_status
create mode 100644 src/test/test-basic0-recovery-antifinity/log.expect
create mode 100644 src/test/test-basic0-recovery-antifinity/manager_status
create mode 100644 src/test/test-basic0-recovery-antifinity/node_stats
create mode 100644 src/test/test-basic0-recovery-antifinity/resources_groups
create mode 100644 src/test/test-basic0-recovery-antifinity/service_config
create mode 100644 src/test/test-basic0-recovery-antifinity/service_stats
create mode 100644 src/test/test-basic00-recovery-storage/README
create mode 100644 src/test/test-basic00-recovery-storage/cmdlist
create mode 100644 src/test/test-basic00-recovery-storage/hardware_status
create mode 100644 src/test/test-basic00-recovery-storage/log.expect
create mode 100644 src/test/test-basic00-recovery-storage/manager_status
create mode 100644 src/test/test-basic00-recovery-storage/service_config
create mode 100644 src/test/test-basic00-recovery-storage/storecfg
create mode 100644 src/test/test-basic00-recovery-storage/vm_config
diff --git a/src/test/test-basic0-balance-affinity/README b/src/test/test-basic0-balance-affinity/README
new file mode 100644
index 0000000..223c9dc
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/README
@@ -0,0 +1 @@
+Test failover after single node network failure.
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-affinity/cmdlist b/src/test/test-basic0-balance-affinity/cmdlist
new file mode 100644
index 0000000..3bfad44
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/cmdlist
@@ -0,0 +1,3 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"]
+]
diff --git a/src/test/test-basic0-balance-affinity/datacenter.cfg b/src/test/test-basic0-balance-affinity/datacenter.cfg
new file mode 100644
index 0000000..3de5c2a
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/datacenter.cfg
@@ -0,0 +1,5 @@
+{
+ "ha": {
+ "balancer": "1"
+ }
+}
diff --git a/src/test/test-basic0-balance-affinity/hardware_status b/src/test/test-basic0-balance-affinity/hardware_status
new file mode 100644
index 0000000..119b81c
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-affinity/log.expect b/src/test/test-basic0-balance-affinity/log.expect
new file mode 100644
index 0000000..3f02d55
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/log.expect
@@ -0,0 +1,40 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:101' on node 'node1'
+info 20 node1/crm: adding new service 'vm:102' on node 'node2'
+info 20 node1/crm: adding new service 'vm:103' on node 'node3'
+info 21 node1/lrm: got lock 'ha_agent_node1_lock'
+info 21 node1/lrm: status change wait_for_agent_lock => active
+info 21 node1/lrm: starting service vm:101
+info 21 node1/lrm: service status vm:101 started
+info 22 node2/crm: status change wait_for_quorum => slave
+info 23 node2/lrm: got lock 'ha_agent_node2_lock'
+info 23 node2/lrm: status change wait_for_agent_lock => active
+info 23 node2/lrm: starting service vm:102
+info 23 node2/lrm: service status vm:102 started
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 25 node3/lrm: starting service vm:103
+info 25 node3/lrm: service status vm:103 started
+info 140 node1/crm: balancer - migrate service 'vm:101' to node 'node3' (running)
+info 140 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node1, target = node3)
+info 141 node1/lrm: service vm:101 - start migrate to node 'node3'
+info 141 node1/lrm: service vm:101 - end migrate to node 'node3'
+info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3)
+info 165 node3/lrm: starting service vm:101
+info 165 node3/lrm: service status vm:101 started
+info 620 hardware: exit simulation - done
diff --git a/src/test/test-basic0-balance-affinity/manager_status b/src/test/test-basic0-balance-affinity/manager_status
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/manager_status
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-affinity/node_stats b/src/test/test-basic0-balance-affinity/node_stats
new file mode 100644
index 0000000..967da45
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/node_stats
@@ -0,0 +1,5 @@
+{
+ "node1": { "cpu": 0.2, "maxcpu": 32,"mem": 20737418239,"maxmem": 107374182400 },
+ "node2": { "cpu": 0.1, "maxcpu": 32,"mem": 22737418240,"maxmem": 107374182400 },
+ "node3": { "cpu": 0.1, "maxcpu": 32,"mem": 10737418240,"maxmem": 107374182400 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-affinity/resources_groups b/src/test/test-basic0-balance-affinity/resources_groups
new file mode 100644
index 0000000..7f1927d
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/resources_groups
@@ -0,0 +1,3 @@
+resourcegroup: group1
+ resources vm:101,vm:103
+ affinity group
diff --git a/src/test/test-basic0-balance-affinity/service_config b/src/test/test-basic0-balance-affinity/service_config
new file mode 100644
index 0000000..c202a34
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/service_config
@@ -0,0 +1,5 @@
+{
+ "vm:101": { "node": "node1", "state": "enabled" },
+ "vm:102": { "node": "node2", "state": "enabled" },
+ "vm:103": { "node": "node3", "state": "enabled" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-affinity/service_stats b/src/test/test-basic0-balance-affinity/service_stats
new file mode 100644
index 0000000..2806fed
--- /dev/null
+++ b/src/test/test-basic0-balance-affinity/service_stats
@@ -0,0 +1,5 @@
+{
+ "101": { "cpu": 0.5, "maxcpu": 16,"mem": 107374182,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "102": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "103": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-cpupressure/README b/src/test/test-basic0-balance-cpupressure/README
new file mode 100644
index 0000000..223c9dc
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/README
@@ -0,0 +1 @@
+Test failover after single node network failure.
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-cpupressure/cmdlist b/src/test/test-basic0-balance-cpupressure/cmdlist
new file mode 100644
index 0000000..3bfad44
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/cmdlist
@@ -0,0 +1,3 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"]
+]
diff --git a/src/test/test-basic0-balance-cpupressure/datacenter.cfg b/src/test/test-basic0-balance-cpupressure/datacenter.cfg
new file mode 100644
index 0000000..3de5c2a
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/datacenter.cfg
@@ -0,0 +1,5 @@
+{
+ "ha": {
+ "balancer": "1"
+ }
+}
diff --git a/src/test/test-basic0-balance-cpupressure/hardware_status b/src/test/test-basic0-balance-cpupressure/hardware_status
new file mode 100644
index 0000000..119b81c
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-cpupressure/log.expect b/src/test/test-basic0-balance-cpupressure/log.expect
new file mode 100644
index 0000000..0087761
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/log.expect
@@ -0,0 +1,40 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:101' on node 'node1'
+info 20 node1/crm: adding new service 'vm:102' on node 'node2'
+info 20 node1/crm: adding new service 'vm:103' on node 'node3'
+info 21 node1/lrm: got lock 'ha_agent_node1_lock'
+info 21 node1/lrm: status change wait_for_agent_lock => active
+info 21 node1/lrm: starting service vm:101
+info 21 node1/lrm: service status vm:101 started
+info 22 node2/crm: status change wait_for_quorum => slave
+info 23 node2/lrm: got lock 'ha_agent_node2_lock'
+info 23 node2/lrm: status change wait_for_agent_lock => active
+info 23 node2/lrm: starting service vm:102
+info 23 node2/lrm: service status vm:102 started
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 25 node3/lrm: starting service vm:103
+info 25 node3/lrm: service status vm:103 started
+info 140 node1/crm: balancer - migrate service 'vm:102' to node 'node1' (running)
+info 140 node1/crm: service 'vm:102': state changed from 'started' to 'migrate' (node = node2, target = node1)
+info 143 node2/lrm: service vm:102 - start migrate to node 'node1'
+info 143 node2/lrm: service vm:102 - end migrate to node 'node1'
+info 160 node1/crm: service 'vm:102': state changed from 'migrate' to 'started' (node = node1)
+info 161 node1/lrm: starting service vm:102
+info 161 node1/lrm: service status vm:102 started
+info 620 hardware: exit simulation - done
diff --git a/src/test/test-basic0-balance-cpupressure/manager_status b/src/test/test-basic0-balance-cpupressure/manager_status
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/manager_status
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-cpupressure/node_stats b/src/test/test-basic0-balance-cpupressure/node_stats
new file mode 100644
index 0000000..285da50
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/node_stats
@@ -0,0 +1,5 @@
+{
+ "node1": { "cpu": 0.1, "maxcpu": 32,"mem": 20737418239,"maxmem": 107374182400 },
+ "node2": { "cpu": 0.3, "maxcpu": 32,"mem": 22737418240,"maxmem": 107374182400 },
+ "node3": { "cpu": 0.7, "maxcpu": 32,"mem": 10737418240,"maxmem": 107374182400 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-cpupressure/service_config b/src/test/test-basic0-balance-cpupressure/service_config
new file mode 100644
index 0000000..c202a34
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/service_config
@@ -0,0 +1,5 @@
+{
+ "vm:101": { "node": "node1", "state": "enabled" },
+ "vm:102": { "node": "node2", "state": "enabled" },
+ "vm:103": { "node": "node3", "state": "enabled" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-balance-cpupressure/service_stats b/src/test/test-basic0-balance-cpupressure/service_stats
new file mode 100644
index 0000000..babdb01
--- /dev/null
+++ b/src/test/test-basic0-balance-cpupressure/service_stats
@@ -0,0 +1,5 @@
+{
+ "101": { "cpu": 0.5, "maxcpu": 16,"mem": 107374182,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "102": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 10 },
+ "103": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-affinity/README b/src/test/test-basic0-recovery-affinity/README
new file mode 100644
index 0000000..223c9dc
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/README
@@ -0,0 +1 @@
+Test failover after single node network failure.
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-affinity/cmdlist b/src/test/test-basic0-recovery-affinity/cmdlist
new file mode 100644
index 0000000..eee0e40
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/cmdlist
@@ -0,0 +1,4 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "network node3 off" ]
+]
diff --git a/src/test/test-basic0-recovery-affinity/datacenter.cfg b/src/test/test-basic0-recovery-affinity/datacenter.cfg
new file mode 100644
index 0000000..3de5c2a
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/datacenter.cfg
@@ -0,0 +1,5 @@
+{
+ "ha": {
+ "balancer": "1"
+ }
+}
diff --git a/src/test/test-basic0-recovery-affinity/hardware_status b/src/test/test-basic0-recovery-affinity/hardware_status
new file mode 100644
index 0000000..119b81c
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-affinity/log.expect b/src/test/test-basic0-recovery-affinity/log.expect
new file mode 100644
index 0000000..31ded22
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/log.expect
@@ -0,0 +1,53 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:101' on node 'node1'
+info 20 node1/crm: adding new service 'vm:102' on node 'node2'
+info 20 node1/crm: adding new service 'vm:103' on node 'node3'
+info 21 node1/lrm: got lock 'ha_agent_node1_lock'
+info 21 node1/lrm: status change wait_for_agent_lock => active
+info 21 node1/lrm: starting service vm:101
+info 21 node1/lrm: service status vm:101 started
+info 22 node2/crm: status change wait_for_quorum => slave
+info 23 node2/lrm: got lock 'ha_agent_node2_lock'
+info 23 node2/lrm: status change wait_for_agent_lock => active
+info 23 node2/lrm: starting service vm:102
+info 23 node2/lrm: service status vm:102 started
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 25 node3/lrm: starting service vm:103
+info 25 node3/lrm: service status vm:103 started
+info 120 cmdlist: execute network node3 off
+info 120 node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info 124 node3/crm: status change slave => wait_for_quorum
+info 125 node3/lrm: status change active => lost_agent_lock
+info 160 node1/crm: service 'vm:103': state changed from 'started' to 'fence'
+info 160 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
+emai 160 node1/crm: FENCE: Try to fence node 'node3'
+info 166 watchdog: execute power node3 off
+info 165 node3/crm: killed by poweroff
+info 166 node3/lrm: killed by poweroff
+info 166 hardware: server 'node3' stopped by poweroff (watchdog)
+info 240 node1/crm: got lock 'ha_agent_node3_lock'
+info 240 node1/crm: fencing: acknowledged - got agent lock for node 'node3'
+info 240 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+emai 240 node1/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node3'
+info 240 node1/crm: service 'vm:103': state changed from 'fence' to 'recovery'
+info 240 node1/crm: recover service 'vm:103' from fenced node 'node3' to node 'node1'
+info 240 node1/crm: service 'vm:103': state changed from 'recovery' to 'started' (node = node1)
+info 241 node1/lrm: starting service vm:103
+info 241 node1/lrm: service status vm:103 started
+info 720 hardware: exit simulation - done
diff --git a/src/test/test-basic0-recovery-affinity/manager_status b/src/test/test-basic0-recovery-affinity/manager_status
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/manager_status
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-affinity/node_stats b/src/test/test-basic0-recovery-affinity/node_stats
new file mode 100644
index 0000000..967da45
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/node_stats
@@ -0,0 +1,5 @@
+{
+ "node1": { "cpu": 0.2, "maxcpu": 32,"mem": 20737418239,"maxmem": 107374182400 },
+ "node2": { "cpu": 0.1, "maxcpu": 32,"mem": 22737418240,"maxmem": 107374182400 },
+ "node3": { "cpu": 0.1, "maxcpu": 32,"mem": 10737418240,"maxmem": 107374182400 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-affinity/resources_groups b/src/test/test-basic0-recovery-affinity/resources_groups
new file mode 100644
index 0000000..7f1927d
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/resources_groups
@@ -0,0 +1,3 @@
+resourcegroup: group1
+ resources vm:101,vm:103
+ affinity group
diff --git a/src/test/test-basic0-recovery-affinity/service_config b/src/test/test-basic0-recovery-affinity/service_config
new file mode 100644
index 0000000..c202a34
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/service_config
@@ -0,0 +1,5 @@
+{
+ "vm:101": { "node": "node1", "state": "enabled" },
+ "vm:102": { "node": "node2", "state": "enabled" },
+ "vm:103": { "node": "node3", "state": "enabled" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-affinity/service_stats b/src/test/test-basic0-recovery-affinity/service_stats
new file mode 100644
index 0000000..46ed27e
--- /dev/null
+++ b/src/test/test-basic0-recovery-affinity/service_stats
@@ -0,0 +1,5 @@
+{
+ "101": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "102": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "103": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-antifinity/README b/src/test/test-basic0-recovery-antifinity/README
new file mode 100644
index 0000000..223c9dc
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/README
@@ -0,0 +1 @@
+Test failover after single node network failure.
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-antifinity/cmdlist b/src/test/test-basic0-recovery-antifinity/cmdlist
new file mode 100644
index 0000000..eee0e40
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/cmdlist
@@ -0,0 +1,4 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "network node3 off" ]
+]
diff --git a/src/test/test-basic0-recovery-antifinity/datacenter.cfg b/src/test/test-basic0-recovery-antifinity/datacenter.cfg
new file mode 100644
index 0000000..3de5c2a
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/datacenter.cfg
@@ -0,0 +1,5 @@
+{
+ "ha": {
+ "balancer": "1"
+ }
+}
diff --git a/src/test/test-basic0-recovery-antifinity/hardware_status b/src/test/test-basic0-recovery-antifinity/hardware_status
new file mode 100644
index 0000000..119b81c
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-antifinity/log.expect b/src/test/test-basic0-recovery-antifinity/log.expect
new file mode 100644
index 0000000..31ded22
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/log.expect
@@ -0,0 +1,53 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:101' on node 'node1'
+info 20 node1/crm: adding new service 'vm:102' on node 'node2'
+info 20 node1/crm: adding new service 'vm:103' on node 'node3'
+info 21 node1/lrm: got lock 'ha_agent_node1_lock'
+info 21 node1/lrm: status change wait_for_agent_lock => active
+info 21 node1/lrm: starting service vm:101
+info 21 node1/lrm: service status vm:101 started
+info 22 node2/crm: status change wait_for_quorum => slave
+info 23 node2/lrm: got lock 'ha_agent_node2_lock'
+info 23 node2/lrm: status change wait_for_agent_lock => active
+info 23 node2/lrm: starting service vm:102
+info 23 node2/lrm: service status vm:102 started
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 25 node3/lrm: starting service vm:103
+info 25 node3/lrm: service status vm:103 started
+info 120 cmdlist: execute network node3 off
+info 120 node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info 124 node3/crm: status change slave => wait_for_quorum
+info 125 node3/lrm: status change active => lost_agent_lock
+info 160 node1/crm: service 'vm:103': state changed from 'started' to 'fence'
+info 160 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
+emai 160 node1/crm: FENCE: Try to fence node 'node3'
+info 166 watchdog: execute power node3 off
+info 165 node3/crm: killed by poweroff
+info 166 node3/lrm: killed by poweroff
+info 166 hardware: server 'node3' stopped by poweroff (watchdog)
+info 240 node1/crm: got lock 'ha_agent_node3_lock'
+info 240 node1/crm: fencing: acknowledged - got agent lock for node 'node3'
+info 240 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+emai 240 node1/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node3'
+info 240 node1/crm: service 'vm:103': state changed from 'fence' to 'recovery'
+info 240 node1/crm: recover service 'vm:103' from fenced node 'node3' to node 'node1'
+info 240 node1/crm: service 'vm:103': state changed from 'recovery' to 'started' (node = node1)
+info 241 node1/lrm: starting service vm:103
+info 241 node1/lrm: service status vm:103 started
+info 720 hardware: exit simulation - done
diff --git a/src/test/test-basic0-recovery-antifinity/manager_status b/src/test/test-basic0-recovery-antifinity/manager_status
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/manager_status
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-antifinity/node_stats b/src/test/test-basic0-recovery-antifinity/node_stats
new file mode 100644
index 0000000..967da45
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/node_stats
@@ -0,0 +1,5 @@
+{
+ "node1": { "cpu": 0.2, "maxcpu": 32,"mem": 20737418239,"maxmem": 107374182400 },
+ "node2": { "cpu": 0.1, "maxcpu": 32,"mem": 22737418240,"maxmem": 107374182400 },
+ "node3": { "cpu": 0.1, "maxcpu": 32,"mem": 10737418240,"maxmem": 107374182400 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-antifinity/resources_groups b/src/test/test-basic0-recovery-antifinity/resources_groups
new file mode 100644
index 0000000..af3b211
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/resources_groups
@@ -0,0 +1,3 @@
+resourcegroup: group1
+ resources vm:102,vm:103
+ affinity separate
diff --git a/src/test/test-basic0-recovery-antifinity/service_config b/src/test/test-basic0-recovery-antifinity/service_config
new file mode 100644
index 0000000..c202a34
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/service_config
@@ -0,0 +1,5 @@
+{
+ "vm:101": { "node": "node1", "state": "enabled" },
+ "vm:102": { "node": "node2", "state": "enabled" },
+ "vm:103": { "node": "node3", "state": "enabled" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic0-recovery-antifinity/service_stats b/src/test/test-basic0-recovery-antifinity/service_stats
new file mode 100644
index 0000000..46ed27e
--- /dev/null
+++ b/src/test/test-basic0-recovery-antifinity/service_stats
@@ -0,0 +1,5 @@
+{
+ "101": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "102": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 },
+ "103": { "cpu": 0.5, "maxcpu": 16,"mem": 1073741824,"maxmem": 1073741824, "cpu_pressure": 0 }
+}
\ No newline at end of file
diff --git a/src/test/test-basic00-recovery-storage/README b/src/test/test-basic00-recovery-storage/README
new file mode 100644
index 0000000..223c9dc
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/README
@@ -0,0 +1 @@
+Test failover after single node network failure.
\ No newline at end of file
diff --git a/src/test/test-basic00-recovery-storage/cmdlist b/src/test/test-basic00-recovery-storage/cmdlist
new file mode 100644
index 0000000..eee0e40
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/cmdlist
@@ -0,0 +1,4 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "network node3 off" ]
+]
diff --git a/src/test/test-basic00-recovery-storage/hardware_status b/src/test/test-basic00-recovery-storage/hardware_status
new file mode 100644
index 0000000..119b81c
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic00-recovery-storage/log.expect b/src/test/test-basic00-recovery-storage/log.expect
new file mode 100644
index 0000000..3302fcc
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/log.expect
@@ -0,0 +1,52 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:101' on node 'node1'
+info 20 node1/crm: adding new service 'vm:102' on node 'node2'
+info 20 node1/crm: adding new service 'vm:103' on node 'node3'
+info 21 node1/lrm: got lock 'ha_agent_node1_lock'
+info 21 node1/lrm: status change wait_for_agent_lock => active
+info 21 node1/lrm: starting service vm:101
+info 21 node1/lrm: service status vm:101 started
+info 22 node2/crm: status change wait_for_quorum => slave
+info 23 node2/lrm: got lock 'ha_agent_node2_lock'
+info 23 node2/lrm: status change wait_for_agent_lock => active
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 25 node3/lrm: starting service vm:103
+info 25 node3/lrm: service status vm:103 started
+info 40 node1/crm: service 'vm:102': state changed from 'request_stop' to 'stopped'
+info 120 cmdlist: execute network node3 off
+info 120 node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info 124 node3/crm: status change slave => wait_for_quorum
+info 125 node3/lrm: status change active => lost_agent_lock
+info 160 node1/crm: service 'vm:103': state changed from 'started' to 'fence'
+info 160 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
+emai 160 node1/crm: FENCE: Try to fence node 'node3'
+info 166 watchdog: execute power node3 off
+info 165 node3/crm: killed by poweroff
+info 166 node3/lrm: killed by poweroff
+info 166 hardware: server 'node3' stopped by poweroff (watchdog)
+info 240 node1/crm: got lock 'ha_agent_node3_lock'
+info 240 node1/crm: fencing: acknowledged - got agent lock for node 'node3'
+info 240 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+emai 240 node1/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node3'
+info 240 node1/crm: service 'vm:103': state changed from 'fence' to 'recovery'
+info 240 node1/crm: recover service 'vm:103' from fenced node 'node3' to node 'node1'
+info 240 node1/crm: service 'vm:103': state changed from 'recovery' to 'started' (node = node1)
+info 241 node1/lrm: starting service vm:103
+info 241 node1/lrm: service status vm:103 started
+info 720 hardware: exit simulation - done
diff --git a/src/test/test-basic00-recovery-storage/manager_status b/src/test/test-basic00-recovery-storage/manager_status
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/manager_status
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/test/test-basic00-recovery-storage/service_config b/src/test/test-basic00-recovery-storage/service_config
new file mode 100644
index 0000000..0e05ab4
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/service_config
@@ -0,0 +1,5 @@
+{
+ "vm:101": { "node": "node1", "state": "enabled" },
+ "vm:102": { "node": "node2" },
+ "vm:103": { "node": "node3", "state": "enabled" }
+}
\ No newline at end of file
diff --git a/src/test/test-basic00-recovery-storage/storecfg b/src/test/test-basic00-recovery-storage/storecfg
new file mode 100644
index 0000000..9cd641c
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/storecfg
@@ -0,0 +1,3 @@
+{
+ "local": { "nodes": { "node1": 1, "node3": 1 } }
+}
\ No newline at end of file
diff --git a/src/test/test-basic00-recovery-storage/vm_config b/src/test/test-basic00-recovery-storage/vm_config
new file mode 100644
index 0000000..d6f2d7d
--- /dev/null
+++ b/src/test/test-basic00-recovery-storage/vm_config
@@ -0,0 +1,5 @@
+{
+ "101": { "storage": "local" },
+ "102": { "storage": "local" },
+ "103": { "storage": "local" }
+}
\ No newline at end of file
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread