From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pve-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id BE9361FF172 for <inbox@lore.proxmox.com>; Tue, 1 Apr 2025 15:50:54 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id BAD5D33BA0; Tue, 1 Apr 2025 15:50:42 +0200 (CEST) Date: Tue, 1 Apr 2025 15:50:37 +0200 (CEST) From: =?UTF-8?Q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com> To: Proxmox VE development discussion <pve-devel@lists.proxmox.com> Message-ID: <1614620193.3974.1743515437162@webmail.proxmox.com> In-Reply-To: <mailman.943.1741688960.293.pve-devel@lists.proxmox.com> References: <20250311102905.2680524-1-alexandre.derumier@groupe-cyllene.com> <mailman.943.1741688960.293.pve-devel@lists.proxmox.com> MIME-Version: 1.0 X-Priority: 3 Importance: Normal X-Mailer: Open-Xchange Mailer v7.10.6-Rev75 X-Originating-Client: open-xchange-appsuite X-SPAM-LEVEL: Spam detection results: 0 AWL 0.046 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH v4 pve-storage 1/5] qcow2: add external snapshot support X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/> List-Post: <mailto:pve-devel@lists.proxmox.com> List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox VE development discussion <pve-devel@lists.proxmox.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" <pve-devel-bounces@lists.proxmox.com> > Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 11.03.2025 11:28 CET geschrieben: some sort of description here would be great ;) > --- > src/PVE/Storage.pm | 4 +- > src/PVE/Storage/DirPlugin.pm | 1 + > src/PVE/Storage/Plugin.pm | 232 +++++++++++++++++++++++++++++------ > 3 files changed, 196 insertions(+), 41 deletions(-) > > diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm > index 3b4f041..79e5c3a 100755 > --- a/src/PVE/Storage.pm > +++ b/src/PVE/Storage.pm > @@ -1002,7 +1002,7 @@ sub unmap_volume { > } > > sub vdisk_alloc { > - my ($cfg, $storeid, $vmid, $fmt, $name, $size) = @_; > + my ($cfg, $storeid, $vmid, $fmt, $name, $size, $backing) = @_; > > die "no storage ID specified\n" if !$storeid; > > @@ -1025,7 +1025,7 @@ sub vdisk_alloc { > # lock shared storage > return $plugin->cluster_lock_storage($storeid, $scfg->{shared}, undef, sub { > my $old_umask = umask(umask|0037); > - my $volname = eval { $plugin->alloc_image($storeid, $scfg, $vmid, $fmt, $name, $size) }; > + my $volname = eval { $plugin->alloc_image($storeid, $scfg, $vmid, $fmt, $name, $size, $backing) }; > my $err = $@; > umask $old_umask; > die $err if $err; > diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm > index fb23e0a..1cd7ac3 100644 > --- a/src/PVE/Storage/DirPlugin.pm > +++ b/src/PVE/Storage/DirPlugin.pm > @@ -81,6 +81,7 @@ sub options { > is_mountpoint => { optional => 1 }, > bwlimit => { optional => 1 }, > preallocation => { optional => 1 }, > + snapext => { optional => 1 }, > }; > } > > diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm > index 65cf43f..d7f485f 100644 > --- a/src/PVE/Storage/Plugin.pm > +++ b/src/PVE/Storage/Plugin.pm > @@ -216,6 +216,11 @@ my $defaultData = { > maximum => 65535, > optional => 1, > }, > + 'snapext' => { > + type => 'boolean', > + description => 'enable external snapshot.', > + optional => 1, > + }, > }, > }; > > @@ -716,7 +721,11 @@ sub filesystem_path { > > my $dir = $class->get_subdir($scfg, $vtype); > > - $dir .= "/$vmid" if $vtype eq 'images'; > + if ($scfg->{snapext} && $snapname) { > + $name = $class->get_snap_volname($volname, $snapname); > + } else { > + $dir .= "/$vmid" if $vtype eq 'images'; > + } this is a bit weird, as it mixes volnames (with the `$vmid/` prefix) and names (without), it's only called twice in this patch, and this here already has $volname parsed, so could we maybe let get_snap_volname take and return the $name part without the dir? > > my $path = "$dir/$name"; > > @@ -873,7 +882,7 @@ sub clone_image { > } > > sub alloc_image { > - my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_; > + my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size, $backing) = @_; this extends the storage API, so it should actually do that.. and probably $backing should not be an arbitrary path, but something that is resolved locally? > > my $imagedir = $class->get_subdir($scfg, 'images'); > $imagedir .= "/$vmid"; > @@ -901,17 +910,11 @@ sub alloc_image { > umask $old_umask; > die $err if $err; > } else { > - my $cmd = ['/usr/bin/qemu-img', 'create']; > - > - my $prealloc_opt = preallocation_cmd_option($scfg, $fmt); > - push @$cmd, '-o', $prealloc_opt if defined($prealloc_opt); > > - push @$cmd, '-f', $fmt, $path, "${size}K"; > - > - eval { run_command($cmd, errmsg => "unable to create image"); }; > + eval { qemu_img_create($scfg, $fmt, $size, $path, $backing) }; > if ($@) { > unlink $path; > - rmdir $imagedir; > + rmdir $imagedir if !$backing; don't think this is needed, rmdir will fail if the dir isn't empty anyway.. > die "$@"; > } > } > @@ -955,6 +958,50 @@ sub free_image { > # TODO taken from PVE/QemuServer/Drive.pm, avoiding duplication would be nice > my @checked_qemu_img_formats = qw(raw cow qcow qcow2 qed vmdk cloop); > > +sub qemu_img_create { > + my ($scfg, $fmt, $size, $path, $backing) = @_; > + > + my $cmd = ['/usr/bin/qemu-img', 'create']; > + > + my $options = []; > + > + if($backing) { > + push @$cmd, '-b', $backing, '-F', 'qcow2'; > + push @$options, 'extended_l2=on','cluster_size=128k'; > + }; > + push @$options, preallocation_cmd_option($scfg, $fmt); > + push @$cmd, '-o', join(',', @$options) if @$options > 0; > + push @$cmd, '-f', $fmt, $path; > + push @$cmd, "${size}K" if !$backing; is this because it will automatically take the size from the backing image? > + > + run_command($cmd, errmsg => "unable to create image"); > +} > + > +sub qemu_img_info { > + my ($filename, $file_format, $timeout, $follow_backing_files) = @_; > + > + my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename]; > + push $cmd->@*, '-f', $file_format if $file_format; > + push $cmd->@*, '--backing-chain' if $follow_backing_files; > + > + my $json = ''; > + my $err_output = ''; > + eval { > + run_command($cmd, > + timeout => $timeout, > + outfunc => sub { $json .= shift }, > + errfunc => sub { $err_output .= shift . "\n"}, > + ); > + }; > + warn $@ if $@; > + if ($err_output) { > + # if qemu did not output anything to stdout we die with stderr as an error > + die $err_output if !$json; > + # otherwise we warn about it and try to parse the json > + warn $err_output; > + } > + return $json; > +} > # set $untrusted if the file in question might be malicious since it isn't > # created by our stack > # this makes certain checks fatal, and adds extra checks for known problems like > @@ -1018,25 +1065,9 @@ sub file_size_info { > warn "file_size_info: '$filename': falling back to 'raw' from unknown format '$file_format'\n"; > $file_format = 'raw'; > } > - my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename]; > - push $cmd->@*, '-f', $file_format if $file_format; > > - my $json = ''; > - my $err_output = ''; > - eval { > - run_command($cmd, > - timeout => $timeout, > - outfunc => sub { $json .= shift }, > - errfunc => sub { $err_output .= shift . "\n"}, > - ); > - }; > - warn $@ if $@; > - if ($err_output) { > - # if qemu did not output anything to stdout we die with stderr as an error > - die $err_output if !$json; > - # otherwise we warn about it and try to parse the json > - warn $err_output; > - } > + my $json = qemu_img_info($filename, $file_format, $timeout); > + > if (!$json) { > die "failed to query file information with qemu-img\n" if $untrusted; > # skip decoding if there was no output, e.g. if there was a timeout. > @@ -1162,11 +1193,29 @@ sub volume_snapshot { > > die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/; > > - my $path = $class->filesystem_path($scfg, $volname); > + if($scfg->{snapext}) { > + > + my $path = $class->path($scfg, $volname, $storeid); > + my $snappath = $class->path($scfg, $volname, $storeid, $snap); > + #rename current volume to snap volume > + die "snapshot volume $snappath already exist\n" if -e $snappath; > + rename($path, $snappath) if -e $path; this is still looking weird.. I don't think it makes sense interface wise to allow snapshotting a volume that doesn't even exist.. > + > + my ($vtype, $name, $vmid, undef, undef, $isBase, $format) = > + $class->parse_volname($volname); > + > + $class->alloc_image($storeid, $scfg, $vmid, 'qcow2', $name, undef, $snappath); > + if ($@) { > + eval { $class->free_image($storeid, $scfg, $volname, 0) }; > + warn $@ if $@; missing cleanup - this should undo the rename from above > + } > > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path]; > + } else { > > - run_command($cmd); > + my $path = $class->filesystem_path($scfg, $volname); > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path]; > + run_command($cmd); > + } > > return undef; > } > @@ -1177,6 +1226,21 @@ sub volume_snapshot { > sub volume_rollback_is_possible { > my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_; > > + if ($scfg->{snapext}) { > + #technically, we could manage multibranch, we it need lot more work for snapshot delete > + #we need to implemente block-stream from deleted snapshot to all others child branchs > + #when online, we need to do a transaction for multiple disk when delete the last snapshot > + #and need to merge in current running file > + > + my $snappath = $class->path($scfg, $volname, $storeid, $snap); > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname); > + my $parentsnap = $snapshots->{current}->{parent}; wouldn't it be enough to check that this equals $snap? > + > + return 1 if $snapshots->{$parentsnap}->{file} eq $snappath; > + > + die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n"; > + } > + > return 1; > } > > @@ -1187,9 +1251,15 @@ sub volume_snapshot_rollback { > > my $path = $class->filesystem_path($scfg, $volname); > > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path]; > - > - run_command($cmd); > + if ($scfg->{snapext}) { > + #simply delete the current snapshot and recreate it > + my $path = $class->filesystem_path($scfg, $volname); > + unlink($path); > + $class->volume_snapshot($scfg, $storeid, $volname, $snap); instead of volume_snapshot, this could simply call alloc_image with the backing file? then volume_snapshot could always rename and always cleanup properly.. > + } else { > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path]; > + run_command($cmd); > + } > > return undef; > } > @@ -1201,13 +1271,49 @@ sub volume_snapshot_delete { > > return 1 if $running; > > + my $cmd = ""; > my $path = $class->filesystem_path($scfg, $volname); > > - $class->deactivate_volume($storeid, $scfg, $volname, $snap, {}); > + if ($scfg->{snapext}) { > + > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname); > + my $snappath = $snapshots->{$snap}->{file}; > + die "volume $snappath is missing" if !-e $snappath; > > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path]; > + my $parentsnap = $snapshots->{$snap}->{parent}; > + my $childsnap = $snapshots->{$snap}->{child}; > > - run_command($cmd); > + my $parentpath = $snapshots->{$parentsnap}->{file} if $parentsnap; > + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap; my $foo = .. if ...; is forbidden in our code ;) but I think we always need to have a childsnap anyway, right? so we could simply check for that, and then switch around the two branches below so that one of them can do if (my $parentsnap = ...) { ... } else { ... } > + > + #if first snapshot,as it should be bigger, we merge child, and rename the snapshot to child > + if(!$parentsnap) { > + print"commit $childpath\n"; > + $cmd = ['/usr/bin/qemu-img', 'commit', $childpath]; we could provide `-d` here to skip emptying $childpath since we renamed over it anyway below.. > + eval { run_command($cmd) }; > + if ($@) { > + die "error commiting $childpath to $parentpath; $@\n"; this is wrong, there is no $parentpath.. we are committing into $snappath > + } > + print"rename $snappath to $childpath\n"; > + rename($snappath, $childpath); what if this fails? > + } else { > + #we rebase the child image on the parent as new backing image should we extend this to make it clear what this means? it means copying any parts of $snap that are not in $parent and not yet overwritten by $child into $child, right? so how expensive this is depends on: - how many changes are between $parent and $snap (increases cost) - how many of those are overwritten by changes between $snap and $child (decreases cost) > + die "missing parentsnap snapshot to rebase child $childpath\n" if !$parentpath; how can this happen? if there is a parentsnap there must be a parentpath as well? > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-b', $parentpath, '-F', 'qcow2', '-f', 'qcow2', $childpath]; > + eval { run_command($cmd) }; > + if ($@) { > + die "error rebase $childpath from $parentpath; $@\n"; > + } > + #delete the snapshot > + unlink($snappath); > + } > + > + } else { > + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {}); > + > + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path]; > + run_command($cmd); > + } > > return undef; > } > @@ -1246,7 +1352,7 @@ sub volume_has_feature { > current => { qcow2 => 1, raw => 1, vmdk => 1 }, > }, > rename => { > - current => {qcow2 => 1, raw => 1, vmdk => 1}, > + current => { qcow2 => 1, raw => 1, vmdk => 1}, > }, > }; > > @@ -1481,7 +1587,37 @@ sub status { > sub volume_snapshot_info { > my ($class, $scfg, $storeid, $volname) = @_; > > - die "volume_snapshot_info is not implemented for $class"; > + my $path = $class->filesystem_path($scfg, $volname); > + > + my $backing_chain = 1; > + my $json = qemu_img_info($path, undef, 10, $backing_chain); > + die "failed to query file information with qemu-img\n" if !$json; > + my $snapshots = eval { decode_json($json) }; missing error handlign for json decoding.. > + > + my $info = {}; > + my $order = 0; > + for my $snap (@$snapshots) { > + > + my $snapfile = $snap->{filename}; > + my $snapname = parse_snapname($snapfile); > + $snapname = 'current' if !$snapname; > + my $snapvolname = $class->get_snap_volname($volname, $snapname); > + > + $info->{$snapname}->{order} = $order; > + $info->{$snapname}->{file}= $snapfile; > + $info->{$snapname}->{volname} = $snapvolname; > + $info->{$snapname}->{volid} = "$storeid:$snapvolname"; > + $info->{$snapname}->{ext} = 1; > + > + my $parentfile = $snap->{'backing-filename'}; > + if ($parentfile) { > + my $parentname = parse_snapname($parentfile); > + $info->{$snapname}->{parent} = $parentname; > + $info->{$parentname}->{child} = $snapname; > + } > + $order++; > + } > + return $info; > } > > sub activate_storage { > @@ -1867,4 +2003,22 @@ sub config_aware_base_mkdir { > } > } > > +sub get_snap_volname { > + my ($class, $volname, $snapname) = @_; > + > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname); > + $name = !$snapname || $snapname eq 'current' ? $volname : "$vmid/snap-$snapname-$name"; other way round would be better to group by volume first IMHO ($vmid/snap-$name-$snapname), as this is similar to how we encode snapshots often on the storage level (volume@snap). we also need to have some delimiter between snapshot and volume name that is not allowed in either (hard for volname since basically everything but '/' goes, but snapshots have a restricted character set (configid, which means alphanumeric, hyphen and underscore), so we could use something like '.' as delimiter? or we switch to directories and do $vmid/snap/$snap/$name?) > + return $name; > +} > + > +sub parse_snapname { > + my ($name) = @_; > + > + my $basename = basename($name); > + if ($basename =~ m/^snap-(.*)-vm(.*)$/) { this is not strict enough, see above > + return $1; > + } > + return undef; > +} > + > 1; > -- > 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel