* [pve-devel] [PATCH v2 zsync 2/3] introduce and use read_file helper
2020-12-17 14:17 [pve-devel] [PATCH v2 zsync 1/3] remove unused function write_cron Fabian Ebner
@ 2020-12-17 14:17 ` Fabian Ebner
2020-12-18 16:44 ` Thomas Lamprecht
2020-12-17 14:17 ` [pve-devel] [PATCH v2 zsync 3/3] fix #2821: only abort if there really is a waiting/syncing job Fabian Ebner
2020-12-18 16:43 ` [pve-devel] applied-series: Re: [PATCH v2 zsync 1/3] remove unused function write_cron Thomas Lamprecht
2 siblings, 1 reply; 5+ messages in thread
From: Fabian Ebner @ 2020-12-17 14:17 UTC (permalink / raw)
To: pve-devel
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
New in v2
Is there a special reason why the input file handle was kept open until
after renaming (in update_cron and update_state)?
pve-zsync | 53 ++++++++++++++++++++---------------------------------
1 file changed, 20 insertions(+), 33 deletions(-)
diff --git a/pve-zsync b/pve-zsync
index 881b9c8..76e12ce 100755
--- a/pve-zsync
+++ b/pve-zsync
@@ -81,6 +81,19 @@ sub check_bin {
die "unable to find command '$bin'\n";
}
+sub read_file {
+ my ($filename, $one_line_only) = @_;
+
+ my $fh = IO::File->new($filename, "r")
+ or die "Could not open file ${filename}: $!\n";
+
+ my $text = $one_line_only ? <$fh> : [ <$fh> ];
+
+ close($fh);
+
+ return $text;
+}
+
sub cut_target_width {
my ($path, $maxlen) = @_;
$path =~ s@/+@/@g;
@@ -202,14 +215,9 @@ sub read_cron {
return undef;
}
- my $fh = IO::File->new("< $CRONJOBS");
- die "Could not open file $CRONJOBS: $!\n" if !$fh;
-
- my @text = <$fh>;
-
- close($fh);
+ my $text = read_file($CRONJOBS, 0);
- return encode_cron(@text);
+ return encode_cron(@{$text});
}
sub parse_argv {
@@ -329,28 +337,14 @@ sub read_state {
return undef;
}
- my $fh = IO::File->new("< $STATE");
- die "Could not open file $STATE: $!\n" if !$fh;
-
- my $text = <$fh>;
- my $states = decode_json($text);
-
- close($fh);
-
- return $states;
+ my $text = read_file($STATE, 1);
+ return decode_json($text);
}
sub update_state {
my ($job) = @_;
- my $text;
- my $in_fh;
- eval {
-
- $in_fh = IO::File->new("< $STATE");
- die "Could not open file $STATE: $!\n" if !$in_fh;
- $text = <$in_fh>;
- };
+ my $text = eval { read_file($STATE, 1); };
my $out_fh = IO::File->new("> $STATE.new");
die "Could not open file ${STATE}.new: $!\n" if !$out_fh;
@@ -382,9 +376,6 @@ sub update_state {
close($out_fh);
rename "$STATE.new", $STATE;
- eval {
- close($in_fh);
- };
return $states;
}
@@ -399,12 +390,9 @@ sub update_cron {
my $header = "SHELL=/bin/sh\n";
$header .= "PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin\n\n";
- my $fh = IO::File->new("< $CRONJOBS");
- die "Could not open file $CRONJOBS: $!\n" if !$fh;
-
- my @test = <$fh>;
+ my $current = read_file($CRONJOBS, 0);
- while (my $line = shift(@test)) {
+ foreach my $line (@{$current}) {
chomp($line);
if ($line =~ m/source $job->{source} .*name $job->{name} /) {
$updated = 1;
@@ -433,7 +421,6 @@ sub update_cron {
close ($new_fh);
die "can't move $CRONJOBS.new: $!\n" if !rename "${CRONJOBS}.new", $CRONJOBS;
- close ($fh);
}
sub format_job {
--
2.20.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [pve-devel] [PATCH v2 zsync 3/3] fix #2821: only abort if there really is a waiting/syncing job
2020-12-17 14:17 [pve-devel] [PATCH v2 zsync 1/3] remove unused function write_cron Fabian Ebner
2020-12-17 14:17 ` [pve-devel] [PATCH v2 zsync 2/3] introduce and use read_file helper Fabian Ebner
@ 2020-12-17 14:17 ` Fabian Ebner
2020-12-18 16:43 ` [pve-devel] applied-series: Re: [PATCH v2 zsync 1/3] remove unused function write_cron Thomas Lamprecht
2 siblings, 0 replies; 5+ messages in thread
From: Fabian Ebner @ 2020-12-17 14:17 UTC (permalink / raw)
To: pve-devel
by remembering the process via PID+start time+boot ID and checking for that
information in the new instance. If the old instance can't be found, the new
one will continue and register itself in the state.
After updating the pve-zsync package, if there is a waiting instance running the
old version, one more might be created, because there is no instance_id yet. But
the new instance will set the instance_id, which any later instance will see.
More importantly, if the state is wrongly 'waiting' or 'syncing', i.e.
because an instance was terminated before finishing, we don't abort anymore and
recover from the wrong state, thus fixing the bug.
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
Changes from v1:
* use file reads instead of spawning 'ps'
* use PID+boot specific start time+boot ID instead of PID+absolute start time
* collapse get_process_start_time() and get_instance_id() into one function
* query the ID of the current instance on startup
pve-zsync | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/pve-zsync b/pve-zsync
index 76e12ce..5c95955 100755
--- a/pve-zsync
+++ b/pve-zsync
@@ -55,6 +55,8 @@ my $TARGETRE = qr!^(?:($HOSTRE):)?(\d+|(?:[\w\-_]+)(/.+)?)$!;
my $DISK_KEY_RE = qr/^(?:(?:(?:virtio|ide|scsi|sata|efidisk|mp)\d+)|rootfs): /;
+my $INSTANCE_ID = get_instance_id($$);
+
my $command = $ARGV[0];
if (defined($command) && $command ne 'help' && $command ne 'printpod') {
@@ -274,6 +276,7 @@ sub add_state_to_job {
$job->{state} = $state->{state};
$job->{lsync} = $state->{lsync};
$job->{vm_type} = $state->{vm_type};
+ $job->{instance_id} = $state->{instance_id};
for (my $i = 0; $state->{"snap$i"}; $i++) {
$job->{"snap$i"} = $state->{"snap$i"};
@@ -359,6 +362,7 @@ sub update_state {
if ($job->{state} ne "del") {
$state->{state} = $job->{state};
$state->{lsync} = $job->{lsync};
+ $state->{instance_id} = $job->{instance_id};
$state->{vm_type} = $job->{vm_type};
for (my $i = 0; $job->{"snap$i"} ; $i++) {
@@ -571,6 +575,33 @@ sub destroy_job {
});
}
+sub get_instance_id {
+ my ($pid) = @_;
+
+ my $stat = read_file("/proc/$pid/stat", 1)
+ or die "unable to read process stats\n";
+ my $boot_id = read_file("/proc/sys/kernel/random/boot_id", 1)
+ or die "unable to read boot ID\n";
+
+ my $stats = [ split(/\s+/, $stat) ];
+ my $starttime = $stats->[21];
+ chomp($boot_id);
+
+ return "${pid}:${starttime}:${boot_id}";
+}
+
+sub instance_exists {
+ my ($instance_id) = @_;
+
+ if (defined($instance_id) && $instance_id =~ m/^([1-9][0-9]*):/) {
+ my $pid = $1;
+ my $actual_id = eval { get_instance_id($pid); };
+ return defined($actual_id) && $actual_id eq $instance_id;
+ }
+
+ return 0;
+}
+
sub sync {
my ($param) = @_;
@@ -580,11 +611,16 @@ sub sync {
eval { $job = get_job($param) };
if ($job) {
- if (defined($job->{state}) && ($job->{state} eq "syncing" || $job->{state} eq "waiting")) {
+ my $state = $job->{state} // 'ok';
+ $state = 'ok' if !instance_exists($job->{instance_id});
+
+ if ($state eq "syncing" || $state eq "waiting") {
die "Job --source $param->{source} --name $param->{name} is already scheduled to sync\n";
}
$job->{state} = "waiting";
+ $job->{instance_id} = $INSTANCE_ID;
+
update_state($job);
}
});
@@ -658,6 +694,7 @@ sub sync {
eval { $job = get_job($param); };
if ($job) {
$job->{state} = "error";
+ delete $job->{instance_id};
update_state($job);
}
});
@@ -674,6 +711,7 @@ sub sync {
$job->{state} = "ok";
}
$job->{lsync} = $date;
+ delete $job->{instance_id};
update_state($job);
}
});
--
2.20.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [pve-devel] applied-series: Re: [PATCH v2 zsync 1/3] remove unused function write_cron
2020-12-17 14:17 [pve-devel] [PATCH v2 zsync 1/3] remove unused function write_cron Fabian Ebner
2020-12-17 14:17 ` [pve-devel] [PATCH v2 zsync 2/3] introduce and use read_file helper Fabian Ebner
2020-12-17 14:17 ` [pve-devel] [PATCH v2 zsync 3/3] fix #2821: only abort if there really is a waiting/syncing job Fabian Ebner
@ 2020-12-18 16:43 ` Thomas Lamprecht
2 siblings, 0 replies; 5+ messages in thread
From: Thomas Lamprecht @ 2020-12-18 16:43 UTC (permalink / raw)
To: Proxmox VE development discussion, Fabian Ebner
On 17/12/2020 15:17, Fabian Ebner wrote:
> Commit 76b2c677f7a2fd81a990533b317374d168d1d918 replaced it with
> update_cron.
>
> Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
> ---
>
> New in v2 and not related to the rest of the series.
>
> pve-zsync | 36 ------------------------------------
> 1 file changed, 36 deletions(-)
>
>
applied series, thanks!
^ permalink raw reply [flat|nested] 5+ messages in thread