From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 5A2CA7BAC9 for ; Tue, 2 Nov 2021 14:53:08 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 47E1D8907 for ; Tue, 2 Nov 2021 14:52:38 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 278CE88F7 for ; Tue, 2 Nov 2021 14:52:36 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id D7E7946023 for ; Tue, 2 Nov 2021 14:52:35 +0100 (CET) From: Fabian Ebner To: pve-devel@lists.proxmox.com, Dominik Csapak References: <20211007082727.1385888-1-d.csapak@proxmox.com> <20211007082727.1385888-6-d.csapak@proxmox.com> Message-ID: <189b3128-bbb3-f5d3-c164-ffdf3318e543@proxmox.com> Date: Tue, 2 Nov 2021 14:52:29 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.14.0 MIME-Version: 1.0 In-Reply-To: <20211007082727.1385888-6-d.csapak@proxmox.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.762 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -1.14 Looks like a legit reply (A) PROLO_LEO1 0.1 Meta Catches all Leo drug variations so far SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH manager 4/7] add PVE/Jobs to handle VZDump jobs X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2021 13:53:08 -0000 General style nit: for new code, for is preferred over foreach Am 07.10.21 um 10:27 schrieb Dominik Csapak: > this adds a SectionConfig handling for jobs (only 'vzdump' for now) that > represents a job that will be handled by pvescheduler and a basic > 'job-state' handling for reading/writing state json files > > this has some intersections with pvesrs state handling, but does not > use a single state file for all jobs, but seperate ones, like we > do it in the backup-server. > > Signed-off-by: Dominik Csapak > --- > PVE/Jobs.pm | 210 +++++++++++++++++++++++++++++++++++++++++++++ > PVE/Jobs/Makefile | 16 ++++ > PVE/Jobs/Plugin.pm | 61 +++++++++++++ > PVE/Jobs/VZDump.pm | 54 ++++++++++++ > PVE/Makefile | 3 +- > 5 files changed, 343 insertions(+), 1 deletion(-) > create mode 100644 PVE/Jobs.pm > create mode 100644 PVE/Jobs/Makefile > create mode 100644 PVE/Jobs/Plugin.pm > create mode 100644 PVE/Jobs/VZDump.pm > > diff --git a/PVE/Jobs.pm b/PVE/Jobs.pm > new file mode 100644 > index 00000000..f17676bb > --- /dev/null > +++ b/PVE/Jobs.pm > @@ -0,0 +1,210 @@ > +package PVE::Jobs; > + > +use strict; > +use warnings; > +use JSON; > + > +use PVE::Cluster qw(cfs_read_file); > +use PVE::Jobs::Plugin; > +use PVE::Jobs::VZDump; > +use PVE::Tools; > + > +PVE::Jobs::VZDump->register(); > +PVE::Jobs::Plugin->init(); > + > +my $state_dir = "/var/lib/pve-manager/jobs"; > +my $lock_dir = "/var/lock/pve-manager"; > + > +my $get_state_file = sub { > + my ($jobid, $type) = @_; > + return "$state_dir/$type-$jobid.json"; > +}; > + > +my $default_state = { > + state => 'created', > + time => 0, > +}; > + > +# lockless, since we use file_set_contents, which is atomic s/file_set_contents/file_get_contents/ > +sub read_job_state { > + my ($jobid, $type) = @_; > + my $path = $get_state_file->($jobid, $type); > + return $default_state if ! -e $path; > + > + my $raw = PVE::Tools::file_get_contents($path); > + > + return $default_state if $raw eq ''; > + > + # untaint $raw > + if ($raw =~ m/^(\{.*\})$/) { > + return decode_json($1); > + } > + > + die "invalid json data in '$path'\n"; > +} > + > +sub lock_job_state { > + my ($jobid, $type, $sub) = @_; > + > + my $filename = "$lock_dir/$type-$jobid.lck"; Should new locks use .lock? > + > + my $res = PVE::Tools::lock_file($filename, 10, $sub); > + die $@ if $@; > + > + return $res; > +} > + > +# returns the state and checks if the job was 'started' and is now finished > +sub get_job_state { It feels a bit weird that this function does two things. Maybe it's worth separating the "check if finished" part? > + my ($jobid, $type) = @_; > + > + # first check unlocked to save time, > + my $state = read_job_state($jobid, $type); > + if ($state->{state} ne 'started') { > + return $state; # nothing to check > + } > + > + return lock_job_state($jobid, $type, sub { > + my $state = read_job_state($jobid, $type); > + > + if ($state->{state} ne 'started') { > + return $state; # nothing to check > + } > + > + my ($task, $filename) = PVE::Tools::upid_decode($state->{upid}, 1); > + die "unable to parse worker upid\n" if !$task; > + die "no such task\n" if ! -f $filename; > + > + my $pstart = PVE::ProcFSTools::read_proc_starttime($task->{pid}); > + if ($pstart && $pstart == $task->{pstart}) { > + return $state; # still running > + } > + > + my $new_state = { > + state => 'stopped', > + msg => PVE::Tools::upid_read_status($state->{upid}), > + upid => $state->{upid} > + }; > + > + my $path = $get_state_file->($jobid, $type); > + PVE::Tools::file_set_contents($path, encode_json($new_state)); > + return $new_state; > + }); > +} > + > +# must be called when the job is first created > +sub create_job { > + my ($jobid, $type) = @_; > + > + lock_job_state($jobid, $type, sub { > + my $state = read_job_state($jobid, $type); > + > + if ($state->{state} ne 'created') { > + die "job state already exists\n"; > + } > + > + $state->{time} = time(); > + > + my $path = $get_state_file->($jobid, $type); > + PVE::Tools::file_set_contents($path, encode_json($state)); > + }); > +} > + > +# to be called when the job is removed > +sub remove_job { > + my ($jobid, $type) = @_; > + my $path = $get_state_file->($jobid, $type); > + unlink $path; > +} From what I can tell, this can be called while the job might be active and then suddenly the state file is gone. Is that a problem? > + > +# will be called when the job was started according to schedule > +sub started_job { > + my ($jobid, $type, $upid) = @_; > + lock_job_state($jobid, $type, sub { > + my $state = { > + state => 'started', > + upid => $upid, > + }; > + > + my $path = $get_state_file->($jobid, $type); > + PVE::Tools::file_set_contents($path, encode_json($state)); > + }); > +} > + > +# will be called when the job schedule is updated > +sub updated_job_schedule { > + my ($jobid, $type) = @_; > + lock_job_state($jobid, $type, sub { > + my $old_state = read_job_state($jobid, $type); > + > + if ($old_state->{state} eq 'started') { > + return; # do not update timestamp on running jobs > + } > + > + $old_state->{updated} = time(); > + > + my $path = $get_state_file->($jobid, $type); > + PVE::Tools::file_set_contents($path, encode_json($old_state)); > + }); > +} > + > +sub get_last_runtime { > + my ($jobid, $type) = @_; > + > + my $state = read_job_state($jobid, $type); > + > + if (defined($state->{updated})) { > + return $state->{updated}; > + } I'm confused. Why does 'updated' overwrite the last runtime? Should it return 0 like for a new job? Is there a problem returning the normal last runtime for an updated job? > + > + if (my $upid = $state->{upid}) { > + my ($task) = PVE::Tools::upid_decode($upid, 1); > + die "unable to parse worker upid\n" if !$task; > + return $task->{starttime}; > + } > + > + return $state->{time} // 0; > +} > + > +sub run_jobs { > + my $jobs_cfg = cfs_read_file('jobs.cfg'); > + my $nodename = PVE::INotify::nodename(); > + > + foreach my $id (sort keys %{$jobs_cfg->{ids}}) { > + my $cfg = $jobs_cfg->{ids}->{$id}; > + my $type = $cfg->{type}; > + my $schedule = delete $cfg->{schedule}; Why delete? > + > + # only schedule local jobs > + next if defined($cfg->{node}) && $cfg->{node} eq $nodename; Shouldn't this be 'ne'? > + > + # only schedule enabled jobs > + next if defined($cfg->{enabled}) && !$cfg->{enabled}; > + > + my $last_run = get_last_runtime($id, $type); > + my $calspec = PVE::CalendarEvent::parse_calendar_event($schedule); > + my $next_sync = PVE::CalendarEvent::compute_next_event($calspec, $last_run) // 0; > + > + if (time() >= $next_sync) { > + # only warn on errors, so that all jobs can run > + my $state = get_job_state($id, $type); # to update the state > + > + next if $state->{state} eq 'started'; # still running > + > + my $plugin = PVE::Jobs::Plugin->lookup($type); > + > + my $upid = eval { $plugin->run($cfg) }; > + warn $@ if $@; > + if ($upid) { > + started_job($id, $type, $upid); > + } > + } > + } > +} > + > +sub setup_dirs { > + mkdir $state_dir; > + mkdir $lock_dir; > +} > + > +1; > diff --git a/PVE/Jobs/Makefile b/PVE/Jobs/Makefile > new file mode 100644 > index 00000000..6023c3ba > --- /dev/null > +++ b/PVE/Jobs/Makefile > @@ -0,0 +1,16 @@ > +include ../../defines.mk > + > +PERLSOURCE = \ > + Plugin.pm\ > + VZDump.pm > + > +all: > + > +.PHONY: clean > +clean: > + rm -rf *~ > + > +.PHONY: install > +install: ${PERLSOURCE} > + install -d ${PERLLIBDIR}/PVE/Jobs > + install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/Jobs > diff --git a/PVE/Jobs/Plugin.pm b/PVE/Jobs/Plugin.pm > new file mode 100644 > index 00000000..69c31cf2 > --- /dev/null > +++ b/PVE/Jobs/Plugin.pm > @@ -0,0 +1,61 @@ > +package PVE::Jobs::Plugin; > + > +use strict; > +use warnings; > + > +use PVE::Cluster qw(cfs_register_file); > + > +use base qw(PVE::SectionConfig); > + > +cfs_register_file('jobs.cfg', > + sub { __PACKAGE__->parse_config(@_); }, > + sub { __PACKAGE__->write_config(@_); }); > + > +my $defaultData = { > + propertyList => { > + type => { description => "Section type." }, > + id => { > + description => "The ID of the VZDump job.", > + type => 'string', > + format => 'pve-configid', > + }, > + enabled => { > + description => "Determines if the job is enabled.", > + type => 'boolean', > + default => 1, > + optional => 1, > + }, > + schedule => { > + description => "Backup schedule. The format is a subset of `systemd` calendar events.", > + type => 'string', format => 'pve-calendar-event', > + maxLength => 128, > + }, > + }, > +}; > + > +sub private { > + return $defaultData; > +} > + > +sub parse_config { > + my ($class, $filename, $raw) = @_; > + > + my $cfg = $class->SUPER::parse_config($filename, $raw); > + > + foreach my $id (sort keys %{$cfg->{ids}}) { > + my $data = $cfg->{ids}->{$id}; > + > + $data->{id} = $id; > + $data->{enabled} //= 1; > + } > + > + return $cfg; > +} > + > +sub run { > + my ($class, $cfg) = @_; > + # implement in subclass > + die "not implemented"; > +} > + > +1; > diff --git a/PVE/Jobs/VZDump.pm b/PVE/Jobs/VZDump.pm > new file mode 100644 > index 00000000..043b7ace > --- /dev/null > +++ b/PVE/Jobs/VZDump.pm > @@ -0,0 +1,54 @@ > +package PVE::Jobs::VZDump; > + > +use strict; > +use warnings; > + > +use PVE::INotify; > +use PVE::VZDump::Common; > +use PVE::API2::VZDump; > +use PVE::Cluster; > + > +use base qw(PVE::Jobs::Plugin); > + > +sub type { > + return 'vzdump'; > +} > + > +my $props = PVE::VZDump::Common::json_config_properties(); > + > +sub properties { > + return $props; > +} > + > +sub options { > + my $options = { > + enabled => { optional => 1 }, > + schedule => {}, > + }; > + foreach my $opt (keys %$props) { > + if ($props->{$opt}->{optional}) { > + $options->{$opt} = { optional => 1 }; > + } else { > + $options->{$opt} = {}; > + } > + } > + > + return $options; > +} > + > +sub run { > + my ($class, $conf) = @_; > + > + # remove all non vzdump related options > + foreach my $opt (keys %$conf) { > + delete $conf->{$opt} if !defined($props->{$opt}); > + } > + > + $conf->{quiet} = 1; # do not write to stdout/stderr > + > + PVE::Cluster::cfs_update(); # refresh vmlist > + > + return PVE::API2::VZDump->vzdump($conf); > +} > + > +1; > diff --git a/PVE/Makefile b/PVE/Makefile > index 0071fab1..48b85d33 100644 > --- a/PVE/Makefile > +++ b/PVE/Makefile > @@ -1,6 +1,6 @@ > include ../defines.mk > > -SUBDIRS=API2 Status CLI Service Ceph > +SUBDIRS=API2 Status CLI Service Ceph Jobs > > PERLSOURCE = \ > API2.pm \ > @@ -11,6 +11,7 @@ PERLSOURCE = \ > CertHelpers.pm \ > ExtMetric.pm \ > HTTPServer.pm \ > + Jobs.pm \ > NodeConfig.pm \ > Report.pm \ > VZDump.pm >