From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id E5B777BAF1 for ; Tue, 2 Nov 2021 15:34:13 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C4581A424 for ; Tue, 2 Nov 2021 15:33:43 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 9DC0FA419 for ; Tue, 2 Nov 2021 15:33:42 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 6A9B146014; Tue, 2 Nov 2021 15:33:42 +0100 (CET) Message-ID: Date: Tue, 2 Nov 2021 15:33:40 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:94.0) Gecko/20100101 Thunderbird/94.0 Content-Language: en-US To: Fabian Ebner , pve-devel@lists.proxmox.com References: <20211007082727.1385888-1-d.csapak@proxmox.com> <20211007082727.1385888-6-d.csapak@proxmox.com> <189b3128-bbb3-f5d3-c164-ffdf3318e543@proxmox.com> From: Dominik Csapak In-Reply-To: <189b3128-bbb3-f5d3-c164-ffdf3318e543@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.826 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -1.14 Looks like a legit reply (A) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH manager 4/7] add PVE/Jobs to handle VZDump jobs X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 02 Nov 2021 14:34:13 -0000 On 11/2/21 14:52, Fabian Ebner wrote: [snip] >> + >> +sub lock_job_state { >> +    my ($jobid, $type, $sub) = @_; >> + >> +    my $filename = "$lock_dir/$type-$jobid.lck"; > > Should new locks use .lock? yes, thanks, most of such things are copied... > >> + >> +    my $res = PVE::Tools::lock_file($filename, 10, $sub); >> +    die $@ if $@; >> + >> +    return $res; >> +} >> + >> +# returns the state and checks if the job was 'started' and is now >> finished >> +sub get_job_state { > > It feels a bit weird that this function does two things. Maybe it's > worth separating the "check if finished" part? yeah probably, but i'd still want to do them under a single lock so get_and_check_job_state would be better? > >> +    my ($jobid, $type) = @_; >> + >> +    # first check unlocked to save time, >> +    my $state = read_job_state($jobid, $type); >> +    if ($state->{state} ne 'started') { >> +    return $state; # nothing to check >> +    } >> + >> +    return lock_job_state($jobid, $type, sub { >> +    my $state = read_job_state($jobid, $type); >> + >> +    if ($state->{state} ne 'started') { >> +        return $state; # nothing to check >> +    } >> + >> +    my ($task, $filename) = PVE::Tools::upid_decode($state->{upid}, 1); >> +    die "unable to parse worker upid\n" if !$task; >> +    die "no such task\n" if ! -f $filename; >> + >> +    my $pstart = PVE::ProcFSTools::read_proc_starttime($task->{pid}); >> +    if ($pstart && $pstart == $task->{pstart}) { >> +        return $state; # still running >> +    } >> + >> +    my $new_state = { >> +        state => 'stopped', >> +        msg => PVE::Tools::upid_read_status($state->{upid}), >> +        upid => $state->{upid} >> +    }; >> + >> +    my $path = $get_state_file->($jobid, $type); >> +    PVE::Tools::file_set_contents($path, encode_json($new_state)); >> +    return $new_state; >> +    }); >> +} >> + >> +# must be called when the job is first created >> +sub create_job { >> +    my ($jobid, $type) = @_; >> + >> +    lock_job_state($jobid, $type, sub { >> +    my $state = read_job_state($jobid, $type); >> + >> +    if ($state->{state} ne 'created') { >> +        die "job state already exists\n"; >> +    } >> + >> +    $state->{time} = time(); >> + >> +    my $path = $get_state_file->($jobid, $type); >> +    PVE::Tools::file_set_contents($path, encode_json($state)); >> +    }); >> +} >> + >> +# to be called when the job is removed >> +sub remove_job { >> +    my ($jobid, $type) = @_; >> +    my $path = $get_state_file->($jobid, $type); >> +    unlink $path; >> +} > > From what I can tell, this can be called while the job might be active > and then suddenly the state file is gone. Is that a problem? you're right, the finished job would be writing to the file again the solution would imho be to check if the statefile still exists on finishing. if not, do not write (because the jobs does not exist anymore) or we could forbid removing a job while its still running.. > >> + >> +# will be called when the job was started according to schedule >> +sub started_job { >> +    my ($jobid, $type, $upid) = @_; >> +    lock_job_state($jobid, $type, sub { >> +    my $state = { >> +        state => 'started', >> +        upid => $upid, >> +    }; >> + >> +    my $path = $get_state_file->($jobid, $type); >> +    PVE::Tools::file_set_contents($path, encode_json($state)); >> +    }); >> +} >> + >> +# will be called when the job schedule is updated >> +sub updated_job_schedule { >> +    my ($jobid, $type) = @_; >> +    lock_job_state($jobid, $type, sub { >> +    my $old_state = read_job_state($jobid, $type); >> + >> +    if ($old_state->{state} eq 'started') { >> +        return; # do not update timestamp on running jobs >> +    } >> + >> +    $old_state->{updated} = time(); >> + >> +    my $path = $get_state_file->($jobid, $type); >> +    PVE::Tools::file_set_contents($path, encode_json($old_state)); >> +    }); >> +} >> + >> +sub get_last_runtime { >> +    my ($jobid, $type) = @_; >> + >> +    my $state = read_job_state($jobid, $type); >> + >> +    if (defined($state->{updated})) { >> +    return $state->{updated}; >> +    } > > I'm confused. Why does 'updated' overwrite the last runtime? Should it > return 0 like for a new job? Is there a problem returning the normal > last runtime for an updated job? for the next run we have to consider the updated time (new jobs get the current time, not 0) because if i change from e.g. 10:00 to 8:00 and it is currently 9:00, it would be unexpected that the job starts instantly, instead of the next time its 10:00 (we do the same in pbs for all kinds of jobs) > >> + >> +    if (my $upid = $state->{upid}) { >> +    my ($task) = PVE::Tools::upid_decode($upid, 1); >> +    die "unable to parse worker upid\n" if !$task; >> +    return $task->{starttime}; >> +    } >> + >> +    return $state->{time} // 0; >> +} >> + >> +sub run_jobs { >> +    my $jobs_cfg = cfs_read_file('jobs.cfg'); >> +    my $nodename = PVE::INotify::nodename(); >> + >> +    foreach my $id (sort keys %{$jobs_cfg->{ids}}) { >> +    my $cfg = $jobs_cfg->{ids}->{$id}; >> +    my $type = $cfg->{type}; >> +    my $schedule = delete $cfg->{schedule}; > > Why delete? good question, it's maybe leftover of some intermittent code change. i'll check and change/comment on it in a v2 it's probably because the vzdump code does not understand the 'schedule' parameter and it's more of a 'meta' information > >> + >> +    # only schedule local jobs >> +    next if defined($cfg->{node}) && $cfg->{node} eq $nodename; > > Shouldn't this be 'ne'? you are absolutely right ;) > >> + >> +    # only schedule enabled jobs >> +    next if defined($cfg->{enabled}) && !$cfg->{enabled}; >> + >> +    my $last_run = get_last_runtime($id, $type); >> +    my $calspec = PVE::CalendarEvent::parse_calendar_event($schedule); >> +    my $next_sync = PVE::CalendarEvent::compute_next_event($calspec, >> $last_run) // 0; >> + >> +    if (time() >= $next_sync) { >> +        # only warn on errors, so that all jobs can run >> +        my $state = get_job_state($id, $type); # to update the state >> + >> +        next if $state->{state} eq 'started'; # still running >> + >> +        my $plugin = PVE::Jobs::Plugin->lookup($type); >> + >> +        my $upid = eval { $plugin->run($cfg) }; >> +        warn $@ if $@; >> +        if ($upid) { >> +        started_job($id, $type, $upid); >> +        } >> +    } >> +    } >> +} >> + >> +sub setup_dirs { >> +    mkdir $state_dir; >> +    mkdir $lock_dir; >> +} >> + >> +1; >> diff --git a/PVE/Jobs/Makefile b/PVE/Jobs/Makefile >> new file mode 100644 >> index 00000000..6023c3ba >> --- /dev/null >> +++ b/PVE/Jobs/Makefile >> @@ -0,0 +1,16 @@ >> +include ../../defines.mk >> + >> +PERLSOURCE =   \ >> +    Plugin.pm\ >> +    VZDump.pm >> + >> +all: >> + >> +.PHONY: clean >> +clean: >> +    rm -rf *~ >> + >> +.PHONY: install >> +install: ${PERLSOURCE} >> +    install -d ${PERLLIBDIR}/PVE/Jobs >> +    install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/Jobs >> diff --git a/PVE/Jobs/Plugin.pm b/PVE/Jobs/Plugin.pm >> new file mode 100644 >> index 00000000..69c31cf2 >> --- /dev/null >> +++ b/PVE/Jobs/Plugin.pm >> @@ -0,0 +1,61 @@ >> +package PVE::Jobs::Plugin; >> + >> +use strict; >> +use warnings; >> + >> +use PVE::Cluster qw(cfs_register_file); >> + >> +use base qw(PVE::SectionConfig); >> + >> +cfs_register_file('jobs.cfg', >> +          sub { __PACKAGE__->parse_config(@_); }, >> +          sub { __PACKAGE__->write_config(@_); }); >> + >> +my $defaultData = { >> +    propertyList => { >> +    type => { description => "Section type." }, >> +    id => { >> +        description => "The ID of the VZDump job.", >> +        type => 'string', >> +        format => 'pve-configid', >> +    }, >> +    enabled => { >> +        description => "Determines if the job is enabled.", >> +        type => 'boolean', >> +        default => 1, >> +        optional => 1, >> +    }, >> +    schedule => { >> +        description => "Backup schedule. The format is a subset of >> `systemd` calendar events.", >> +        type => 'string', format => 'pve-calendar-event', >> +        maxLength => 128, >> +    }, >> +    }, >> +}; >> + >> +sub private { >> +    return $defaultData; >> +} >> + >> +sub parse_config { >> +    my ($class, $filename, $raw) = @_; >> + >> +    my $cfg = $class->SUPER::parse_config($filename, $raw); >> + >> +    foreach my $id (sort keys %{$cfg->{ids}}) { >> +    my $data = $cfg->{ids}->{$id}; >> + >> +    $data->{id} = $id; >> +    $data->{enabled}  //= 1; >> +   } >> + >> +   return $cfg; >> +} >> + >> +sub run { >> +    my ($class, $cfg) = @_; >> +    # implement in subclass >> +    die "not implemented"; >> +} >> + >> +1; >> diff --git a/PVE/Jobs/VZDump.pm b/PVE/Jobs/VZDump.pm >> new file mode 100644 >> index 00000000..043b7ace >> --- /dev/null >> +++ b/PVE/Jobs/VZDump.pm >> @@ -0,0 +1,54 @@ >> +package PVE::Jobs::VZDump; >> + >> +use strict; >> +use warnings; >> + >> +use PVE::INotify; >> +use PVE::VZDump::Common; >> +use PVE::API2::VZDump; >> +use PVE::Cluster; >> + >> +use base qw(PVE::Jobs::Plugin); >> + >> +sub type { >> +    return 'vzdump'; >> +} >> + >> +my $props = PVE::VZDump::Common::json_config_properties(); >> + >> +sub properties { >> +    return $props; >> +} >> + >> +sub options { >> +    my $options = { >> +    enabled => { optional => 1 }, >> +    schedule => {}, >> +    }; >> +    foreach my $opt (keys %$props) { >> +    if ($props->{$opt}->{optional}) { >> +        $options->{$opt} = { optional => 1 }; >> +    } else { >> +        $options->{$opt} = {}; >> +    } >> +    } >> + >> +    return $options; >> +} >> + >> +sub run { >> +    my ($class, $conf) = @_; >> + >> +    # remove all non vzdump related options >> +    foreach my $opt (keys %$conf) { >> +    delete $conf->{$opt} if !defined($props->{$opt}); >> +    } >> + >> +    $conf->{quiet} = 1; # do not write to stdout/stderr >> + >> +    PVE::Cluster::cfs_update(); # refresh vmlist >> + >> +    return PVE::API2::VZDump->vzdump($conf); >> +} >> + >> +1; >> diff --git a/PVE/Makefile b/PVE/Makefile >> index 0071fab1..48b85d33 100644 >> --- a/PVE/Makefile >> +++ b/PVE/Makefile >> @@ -1,6 +1,6 @@ >>   include ../defines.mk >> -SUBDIRS=API2 Status CLI Service Ceph >> +SUBDIRS=API2 Status CLI Service Ceph Jobs >>   PERLSOURCE =             \ >>       API2.pm            \ >> @@ -11,6 +11,7 @@ PERLSOURCE =             \ >>       CertHelpers.pm        \ >>       ExtMetric.pm        \ >>       HTTPServer.pm        \ >> +    Jobs.pm            \ >>       NodeConfig.pm        \ >>       Report.pm        \ >>       VZDump.pm >>