From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pve-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 9684E1FF164 for <inbox@lore.proxmox.com>; Fri, 9 May 2025 12:30:21 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 989523A924; Fri, 9 May 2025 12:30:40 +0200 (CEST) Date: Fri, 9 May 2025 12:30:04 +0200 (CEST) From: =?UTF-8?Q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com> To: Proxmox VE development discussion <pve-devel@lists.proxmox.com> Message-ID: <383352558.12917.1746786604729@webmail.proxmox.com> In-Reply-To: <mailman.39.1745322774.394.pve-devel@lists.proxmox.com> References: <20250422115141.808427-1-alexandre.derumier@groupe-cyllene.com> <mailman.39.1745322774.394.pve-devel@lists.proxmox.com> MIME-Version: 1.0 X-Priority: 3 Importance: Normal X-Mailer: Open-Xchange Mailer v7.10.6-Rev75 X-Originating-Client: open-xchange-appsuite X-SPAM-LEVEL: Spam detection results: 0 AWL 0.046 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH pve-storage 2/5] qcow2: add external snapshot support X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/> List-Post: <mailto:pve-devel@lists.proxmox.com> List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox VE development discussion <pve-devel@lists.proxmox.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" <pve-devel-bounces@lists.proxmox.com> > Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 22.04.2025 13:51 CEST geschrieben: > add a snapext option to enable the feature > > When a snapshot is taken, the current volume is renamed to snap volname > and a current image is created with the snap volume as backing file > > Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com> > --- > src/PVE/Storage.pm | 5 +- > src/PVE/Storage/DirPlugin.pm | 1 + > src/PVE/Storage/Plugin.pm | 277 ++++++++++++++++++++++++++++++----- > 3 files changed, 242 insertions(+), 41 deletions(-) > > diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm > index 1a37cc8..db9d190 100755 > --- a/src/PVE/Storage.pm > +++ b/src/PVE/Storage.pm > @@ -348,13 +348,13 @@ sub volume_rollback_is_possible { > } > > sub volume_snapshot { > - my ($cfg, $volid, $snap) = @_; > + my ($cfg, $volid, $snap, $running) = @_; > > my ($storeid, $volname) = parse_volume_id($volid, 1); > if ($storeid) { > my $scfg = storage_config($cfg, $storeid); > my $plugin = PVE::Storage::Plugin->lookup($scfg->{type}); > - return $plugin->volume_snapshot($scfg, $storeid, $volname, $snap); > + return $plugin->volume_snapshot($scfg, $storeid, $volname, $snap, $running); this is an API bump, should be called out somewhere and documented what it means > } elsif ($volid =~ m|^(/.+)$| && -e $volid) { > die "snapshot file/device '$volid' is not possible\n"; > } else { > @@ -378,7 +378,6 @@ sub volume_snapshot_rollback { > } > } > > -# FIXME PVE 8.x remove $running parameter (needs APIAGE reset) > sub volume_snapshot_delete { > my ($cfg, $volid, $snap, $running) = @_; > > diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm > index 734309f..54d8d74 100644 > --- a/src/PVE/Storage/DirPlugin.pm > +++ b/src/PVE/Storage/DirPlugin.pm > @@ -83,6 +83,7 @@ sub options { > is_mountpoint => { optional => 1 }, > bwlimit => { optional => 1 }, > preallocation => { optional => 1 }, > + snapext => { optional => 1 }, > }; > } > > diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm > index 85f761c..3f83fae 100644 > --- a/src/PVE/Storage/Plugin.pm > +++ b/src/PVE/Storage/Plugin.pm > @@ -215,6 +215,11 @@ my $defaultData = { > maximum => 65535, > optional => 1, > }, > + 'snapext' => { > + type => 'boolean', > + description => 'enable external snapshot.', > + optional => 1, > + }, > }, > }; > > @@ -734,6 +739,8 @@ sub filesystem_path { > my ($vtype, $name, $vmid, undef, undef, $isBase, $format) = > $class->parse_volname($volname); > > + $name = $class->get_snap_name($volname, $snapname) if $scfg->{snapext} && $snapname; > + > # Note: qcow2/qed has internal snapshot, so path is always > # the same (with or without snapshot => same file). > die "can't snapshot this image format\n" > @@ -926,14 +933,8 @@ sub alloc_image { > umask $old_umask; > die $err if $err; > } else { > - my $cmd = ['/usr/bin/qemu-img', 'create']; > - > - my $prealloc_opt = preallocation_cmd_option($scfg, $fmt); > - push @$cmd, '-o', $prealloc_opt if defined($prealloc_opt); > > - push @$cmd, '-f', $fmt, $path, "${size}K"; > - > - eval { run_command($cmd, errmsg => "unable to create image"); }; > + eval { qemu_img_create($scfg, $fmt, $size, $path) }; this should be a separate commit, without any semantic changes.. > if ($@) { > unlink $path; > rmdir $imagedir; > @@ -944,6 +945,19 @@ sub alloc_image { > return "$vmid/$name"; > } > > +sub alloc_snap_image { this should be private.. it's also kind of misnamed, as it doesn't allocate a snapshot image, it allocates an image backed by a snapshot? > + my ($class, $storeid, $scfg, $volname, $backing_snap) = @_; > + > + my $path = $class->path($scfg, $volname, $storeid); > + my $backing_path = $class->path($scfg, $volname, $storeid, $backing_snap); > + > + eval { qemu_img_create($scfg, 'qcow2', undef, $path, $backing_path) }; > + if ($@) { > + unlink $path; > + die "$@"; > + } > +} > + > sub free_image { > my ($class, $storeid, $scfg, $volname, $isBase, $format) = @_; > > @@ -980,6 +994,51 @@ sub free_image { > # TODO taken from PVE/QemuServer/Drive.pm, avoiding duplication would be nice > my @checked_qemu_img_formats = qw(raw qcow qcow2 qed vmdk cloop); > > +sub qemu_img_create { should live in some helper module.. > + my ($scfg, $fmt, $size, $path, $backing_path) = @_; > + > + my $cmd = ['/usr/bin/qemu-img', 'create']; > + > + my $options = []; > + > + if($backing_path) { > + push @$cmd, '-b', $backing_path, '-F', 'qcow2'; > + push @$options, 'extended_l2=on','cluster_size=128k'; > + }; > + push @$options, preallocation_cmd_option($scfg, $fmt); > + push @$cmd, '-o', join(',', @$options) if @$options > 0; > + push @$cmd, '-f', $fmt, $path; > + push @$cmd, "${size}K" if !$backing_path; > + > + run_command($cmd, errmsg => "unable to create image"); > +} > + > +sub qemu_img_info { extracting this should be its own commit first, then changes in this commit.. should this also live in some helper module instead of the plugin here? > + my ($filename, $file_format, $timeout, $follow_backing_files) = @_; > + > + my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename]; > + push $cmd->@*, '-f', $file_format if $file_format; > + push $cmd->@*, '--backing-chain' if $follow_backing_files; > + > + my $json = ''; > + my $err_output = ''; > + eval { > + run_command($cmd, > + timeout => $timeout, > + outfunc => sub { $json .= shift }, > + errfunc => sub { $err_output .= shift . "\n"}, > + ); > + }; > + warn $@ if $@; > + if ($err_output) { > + # if qemu did not output anything to stdout we die with stderr as an error > + die $err_output if !$json; > + # otherwise we warn about it and try to parse the json > + warn $err_output; > + } > + return $json; > +} > + > # set $untrusted if the file in question might be malicious since it isn't > # created by our stack > # this makes certain checks fatal, and adds extra checks for known problems like > @@ -1043,25 +1102,9 @@ sub file_size_info { > warn "file_size_info: '$filename': falling back to 'raw' from unknown format '$file_format'\n"; > $file_format = 'raw'; > } > - my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename]; > - push $cmd->@*, '-f', $file_format if $file_format; > > - my $json = ''; > - my $err_output = ''; > - eval { > - run_command($cmd, > - timeout => $timeout, > - outfunc => sub { $json .= shift }, > - errfunc => sub { $err_output .= shift . "\n"}, > - ); > - }; > - warn $@ if $@; > - if ($err_output) { > - # if qemu did not output anything to stdout we die with stderr as an error > - die $err_output if !$json; > - # otherwise we warn about it and try to parse the json > - warn $err_output; > - } > + my $json = qemu_img_info($filename, $file_format, $timeout); > + > if (!$json) { > die "failed to query file information with qemu-img\n" if $untrusted; > # skip decoding if there was no output, e.g. if there was a timeout. > @@ -1183,15 +1226,37 @@ sub volume_resize { > } > > sub volume_snapshot { > - my ($class, $scfg, $storeid, $volname, $snap) = @_; > + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_; > > die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/; > > - my $path = $class->filesystem_path($scfg, $volname); > + if($scfg->{snapext}) { > + > + if ($running) { > + #rename with blockdev-reopen is done at qemu level when running > + $class->alloc_snap_image($storeid, $scfg, $volname, $snap); > + return; > + } > > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path]; > + #rename current volume to snap volume > + my $vmid = ($class->parse_volname($volname))[2]; > + $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, 'current', $snap); > > - run_command($cmd); > + $class->alloc_snap_image($storeid, $scfg, $volname, $snap); > + > + if ($@) { this error here needs to be logged.. > + eval { $class->free_image($storeid, $scfg, $volname, 0) }; > + warn $@ if $@; > + eval { $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, $snap, 'current') }; > + warn $@ if $@; and here we need to die to notify the upper stack that taking the snapshot failed > + } > + > + } else { > + > + my $path = $class->filesystem_path($scfg, $volname); > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path]; > + run_command($cmd); > + } > > return undef; > } > @@ -1202,6 +1267,21 @@ sub volume_snapshot { > sub volume_rollback_is_possible { > my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_; > > + if ($scfg->{snapext}) { > + #technically, we could manage multibranch, we it need lot more work for snapshot delete > + #we need to implemente block-stream from deleted snapshot to all others child branchs > + #when online, we need to do a transaction for multiple disk when delete the last snapshot > + #and need to merge in current running file > + > + my $snappath = $class->path($scfg, $volname, $storeid, $snap); > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname); > + my $parentsnap = $snapshots->{current}->{parent}; > + > + return 1 if $parentsnap eq $snap; > + > + die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n"; > + } > + > return 1; > } > > @@ -1212,9 +1292,21 @@ sub volume_snapshot_rollback { > > my $path = $class->filesystem_path($scfg, $volname); $path is only used in the else branch.. > > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path]; > + if ($scfg->{snapext}) { > + #simply delete the current snapshot and recreate it > + eval { $class->free_image($storeid, $scfg, $volname, 0) }; > + if ($@) { > + die "can't delete old volume $volname: $@\n"; > + } > > - run_command($cmd); > + eval { $class->alloc_snap_image($storeid, $scfg, $volname, $snap) }; > + if ($@) { > + die "can't allocate new volume $volname: $@\n"; > + } > + } else { > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path]; > + run_command($cmd); > + } > > return undef; > } > @@ -1224,15 +1316,65 @@ sub volume_snapshot_delete { > > die "can't delete snapshot for this image format\n" if $volname !~ m/\.(qcow2|qed)$/; > > - return 1 if $running; > - > + my $cmd = ""; > my $path = $class->filesystem_path($scfg, $volname); $path is only used in the else branch.. > > - $class->deactivate_volume($storeid, $scfg, $volname, $snap, {}); > + if ($scfg->{snapext}) { > + > + if ($running) { should we add a comment here noting what this means? i.e., qemu has already removed that snapshot from the backing chain, therefore we only have to drop the image itself? > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname); > + $volname = $class->get_snap_volname($volname, $snap); > + $class->free_image($storeid, $scfg, $volname, $isBase, $format); > + return; > + } > + > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname); > + my $snappath = $snapshots->{$snap}->{file}; > + my $snap_volname = $snapshots->{$snap}->{volname}; > + die "volume $snappath is missing" if !-e $snappath; > + > + my $parentsnap = $snapshots->{$snap}->{parent}; > + my $childsnap = $snapshots->{$snap}->{child}; > + my $childpath = $snapshots->{$childsnap}->{file}; > + > + #if first snapshot,as it should be bigger, we merge child, and rename the snapshot to child > + if(!$parentsnap) { > + print "commit: merge content of $childpath into $snappath\n"; > + $cmd = ['/usr/bin/qemu-img', 'commit', $childpath]; > + eval { run_command($cmd) }; > + if ($@) { should we add an error here about what state this leaves the snapshot in? AFAIU we've potentially written some of the data from $child to $snap, so the state of $snap is technically invalid now? > + die "error commiting $childpath to $snappath; $@\n"; > + } > + print"rename $snappath to $childpath\n"; > + eval { rename($snappath, $childpath) }; rename doesn't die and set $@.. > + if ($@) { > + die "error renaming snapshot: $@\n"; > + } > + } else { > + #we rebase the child image on the parent as new backing image > + my $parentpath = $snapshots->{$parentsnap}->{file}; > + print "rebase: merge diff content between $parentpath and $childpath into $childpath\n"; > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-b', $parentpath, '-F', 'qcow2', '-f', 'qcow2', $childpath]; > + eval { run_command($cmd) }; > + if ($@) { > + die "error rebase $childpath from $parentpath; $@\n"; same here, but in this case $child just contains some duplicate data so nothing is really broken? > + } > + #delete the snapshot > + eval { $class->free_image($storeid, $scfg, $snap_volname, 0); }; > + if ($@) { and here we just leave a stray volume around that is not part of the backing chain anymore, right? > + die "error delete old snapshot volume $snap_volname: $@\n"; > + } > + } > + > + } else { > > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path]; > + return 1 if $running; > > - run_command($cmd); > + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {}); > + > + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path]; > + run_command($cmd); > + } > > return undef; > } > @@ -1271,7 +1413,7 @@ sub volume_has_feature { > current => { qcow2 => 1, raw => 1, vmdk => 1 }, > }, > rename => { > - current => {qcow2 => 1, raw => 1, vmdk => 1}, > + current => { qcow2 => 1, raw => 1, vmdk => 1}, unrelated change.. > }, > }; > > @@ -1506,7 +1648,40 @@ sub status { > sub volume_snapshot_info { > my ($class, $scfg, $storeid, $volname) = @_; > > - die "volume_snapshot_info is not implemented for $class"; > + my $path = $class->filesystem_path($scfg, $volname); > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname); > + > + my $backing_chain = 1; shouldn't this depend on $snapext ? > + my $json = qemu_img_info($path, undef, 10, $backing_chain); > + die "failed to query file information with qemu-img\n" if !$json; > + my $snapshots = eval { decode_json($json) }; > + if ($@) { > + die "Can't decode qemu snapshot list. Invalid JSON\n"; should also contain $@ so we get an idea *what is wrong*.. > + } > + my $info = {}; > + my $order = 0; > + for my $snap (@$snapshots) { this doesn't work for internal snapshots, as then qemu-img info just returns a single object.. or if we pass --backingchain also in that case, then the code below still doesn't correctly handle it.. > + > + my $snapfile = $snap->{filename}; > + my $snapname = parse_snapname($snapfile); > + $snapname = 'current' if !$snapname; > + my $snapvolname = $class->get_snap_volname($volname, $snapname); > + > + $info->{$snapname}->{order} = $order; > + $info->{$snapname}->{file}= $snapfile; > + $info->{$snapname}->{volname} = "$snapvolname"; > + $info->{$snapname}->{volid} = "$storeid:$snapvolname"; > + $info->{$snapname}->{ext} = 1; only if $snapext? > + > + my $parentfile = $snap->{'backing-filename'}; > + if ($parentfile) { > + my $parentname = parse_snapname($parentfile); > + $info->{$snapname}->{parent} = $parentname; > + $info->{$parentname}->{child} = $snapname; > + } > + $order++; > + } > + return $info; > } > > sub activate_storage { > @@ -1907,4 +2082,30 @@ sub config_aware_base_mkdir { > } > } > > +sub get_snap_name { > + my ($class, $volname, $snapname) = @_; > + > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname); > + $name = !$snapname || $snapname eq 'current' ? $name : "snap-$snapname-$name"; > + return $name; > +} > + > +sub get_snap_volname { > + my ($class, $volname, $snapname) = @_; > + > + my $vmid = ($class->parse_volname($volname))[2]; > + my $name = $class->get_snap_name($volname, $snapname); > + return "$vmid/$name"; > +} > + > +sub parse_snapname { > + my ($name) = @_; > + > + my $basename = basename($name); > + if ($basename =~ m/^snap-(.*)-vm(.*)$/) { > + return $1; > + } > + return undef; > +} > + > 1; > -- > 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel