* [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
[not found] <20240930113153.2896648-1-alexandre.derumier@groupe-cyllene.com>
@ 2024-09-30 11:31 ` Alexandre Derumier via pve-devel
2024-10-23 10:12 ` Fabian Grünbichler
2024-09-30 11:31 ` [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot Alexandre Derumier via pve-devel
` (2 subsequent siblings)
3 siblings, 1 reply; 27+ messages in thread
From: Alexandre Derumier via pve-devel @ 2024-09-30 11:31 UTC (permalink / raw)
To: pve-devel; +Cc: Alexandre Derumier
[-- Attachment #1: Type: message/rfc822, Size: 14279 bytes --]
From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH v2 pve-storage 1/2] add external snasphot support
Date: Mon, 30 Sep 2024 13:31:51 +0200
Message-ID: <20240930113153.2896648-2-alexandre.derumier@groupe-cyllene.com>
Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
src/PVE/Storage/DirPlugin.pm | 1 +
src/PVE/Storage/Plugin.pm | 225 +++++++++++++++++++++++++++++++----
2 files changed, 201 insertions(+), 25 deletions(-)
diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm
index 2efa8d5..2bef673 100644
--- a/src/PVE/Storage/DirPlugin.pm
+++ b/src/PVE/Storage/DirPlugin.pm
@@ -80,6 +80,7 @@ sub options {
is_mountpoint => { optional => 1 },
bwlimit => { optional => 1 },
preallocation => { optional => 1 },
+ snapext => { optional => 1 },
};
}
diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
index 6444390..5e5197a 100644
--- a/src/PVE/Storage/Plugin.pm
+++ b/src/PVE/Storage/Plugin.pm
@@ -214,6 +214,11 @@ my $defaultData = {
maximum => 65535,
optional => 1,
},
+ 'snapext' => {
+ type => 'boolean',
+ description => 'enable external snapshot.',
+ optional => 1,
+ },
},
};
@@ -695,7 +700,7 @@ sub get_subdir {
}
sub filesystem_path {
- my ($class, $scfg, $volname, $snapname) = @_;
+ my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
$class->parse_volname($volname);
@@ -703,7 +708,7 @@ sub filesystem_path {
# Note: qcow2/qed has internal snapshot, so path is always
# the same (with or without snapshot => same file).
die "can't snapshot this image format\n"
- if defined($snapname) && $format !~ m/^(qcow2|qed)$/;
+ if defined($snapname) && !$scfg->{snapext} && $format !~ m/^(qcow2|qed)$/;
my $dir = $class->get_subdir($scfg, $vtype);
@@ -711,13 +716,22 @@ sub filesystem_path {
my $path = "$dir/$name";
+ if($scfg->{snapext}) {
+ my $snappath = get_snap_path($path, $snapname);
+ if($snapname) {
+ $path = $snappath;
+ } elsif ($current_snap) {
+ $path = $current_snap->{file};
+ }
+ }
return wantarray ? ($path, $vmid, $vtype) : $path;
}
sub path {
my ($class, $scfg, $volname, $storeid, $snapname) = @_;
- return $class->filesystem_path($scfg, $volname, $snapname);
+ my $current_snapshot = $class->get_current_snapshot($scfg, $storeid, $volname);
+ return $class->filesystem_path($scfg, $volname, $snapname, $current_snapshot);
}
sub create_base {
@@ -1074,13 +1088,31 @@ sub volume_resize {
sub volume_snapshot {
my ($class, $scfg, $storeid, $volname, $snap) = @_;
- die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
+ die "can't snapshot this image format\n" if $volname !~ m/\.(raw|qcow2|qed)$/;
- my $path = $class->filesystem_path($scfg, $volname);
+ die "external snapshot need to be enabled to snapshot .raw volumes\n" if !$scfg->{snapext};
- my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
+ if($scfg->{snapext}) {
- run_command($cmd);
+ my $path = $class->path($scfg, $volname, $storeid);
+
+ my $snappath = get_snap_path($path, $snap);
+ my $format = ($class->parse_volname($volname))[6];
+
+ my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $path,
+ '-F', $format, '-f', 'qcow2', $snappath];
+
+ my $options = "extended_l2=on,";
+ $options .= preallocation_cmd_option($scfg, 'qcow2');
+ push @$cmd, '-o', $options;
+ run_command($cmd);
+
+ } else {
+
+ my $path = $class->filesystem_path($scfg, $volname);
+ my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
+ run_command($cmd);
+ }
return undef;
}
@@ -1091,19 +1123,39 @@ sub volume_snapshot {
sub volume_rollback_is_possible {
my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
+ if ($scfg->{snapext}) {
+ #technically, we could manage multibranch, we it need lot more work for snapshot delete
+ my $path = $class->filesystem_path($scfg, $volname);
+ my $snappath = get_snap_path($path, $snap);
+
+ my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
+ my $currentpath = $snapshots->{current}->{file};
+ return 1 if !-e $snappath || $currentpath eq $snappath;
+
+ die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
+ }
+
return 1;
}
sub volume_snapshot_rollback {
my ($class, $scfg, $storeid, $volname, $snap) = @_;
- die "can't rollback snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
+ die "can't rollback snapshot this image format\n" if $volname !~ m/\.(raw|qcow2|qed)$/;
- my $path = $class->filesystem_path($scfg, $volname);
+ die "external snapshot need to be enabled to rollback snapshot .raw volumes\n" if $volname =~ m/\.(raw)$/ && !$scfg->{snapext};
- my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
+ my $path = $class->filesystem_path($scfg, $volname);
- run_command($cmd);
+ if ($scfg->{snapext}) {
+ #simply delete the current snapshot and recreate it
+ my $snappath = get_snap_path($path, $snap);
+ unlink($snappath);
+ $class->volume_snapshot($scfg, $storeid, $volname, $snap);
+ } else {
+ my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
+ run_command($cmd);
+ }
return undef;
}
@@ -1111,17 +1163,50 @@ sub volume_snapshot_rollback {
sub volume_snapshot_delete {
my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
- die "can't delete snapshot for this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
+ die "can't delete snapshot for this image format\n" if $volname !~ m/\.(raw|qcow2|qed)$/;
+
+ die "external snapshot need to be enabled to delete snapshot of .raw volumes\n" if !$scfg->{snapext};
return 1 if $running;
- my $path = $class->filesystem_path($scfg, $volname);
+ my $cmd = "";
+ if ($scfg->{snapext}) {
+
+ my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
+ my $snappath = $snapshots->{$snap}->{file};
+ return if !-e $snappath; #already deleted ?
+
+ my $parentsnap = $snapshots->{$snap}->{parent};
+ my $childsnap = $snapshots->{$snap}->{child};
+ die "error: can't find a parent for this snapshot" if !$parentsnap;
- $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
+ my $parentpath = $snapshots->{$parentsnap}->{file};
+ my $parentformat = $snapshots->{$parentsnap}->{'format'} if $parentsnap;
+ my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
+ my $childformat = $snapshots->{$childsnap}->{'format'} if $childsnap;
- my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
+ print "merge snapshot $snap to $parentsnap\n";
+ $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
+ run_command($cmd);
+
+ #if we delete an intermediate snapshot, we need to link upper snapshot to base snapshot
+ if($childpath && -e $childpath) {
+ die "missing parentsnap snapshot to rebase child $childpath\n" if !$parentpath;
+ print "link $childsnap to $parentsnap\n";
+ $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath, '-F', $parentformat, '-f', $childformat, $childpath];
+ run_command($cmd);
+ }
+
+ #delete the snapshot
+ unlink($snappath);
+ } else {
+ my $path = $class->filesystem_path($scfg, $volname);
- run_command($cmd);
+ $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
+
+ $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
+ run_command($cmd);
+ }
return undef;
}
@@ -1140,10 +1225,6 @@ sub volume_has_feature {
my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running, $opts) = @_;
my $features = {
- snapshot => {
- current => { qcow2 => 1 },
- snap => { qcow2 => 1 },
- },
clone => {
base => { qcow2 => 1, raw => 1, vmdk => 1 },
},
@@ -1159,11 +1240,23 @@ sub volume_has_feature {
base => { qcow2 => 1, raw => 1, vmdk => 1 },
current => { qcow2 => 1, raw => 1, vmdk => 1 },
},
- rename => {
- current => {qcow2 => 1, raw => 1, vmdk => 1},
- },
+ 'rename' => {
+ current => { qcow2 => 1, raw => 1, vmdk => 1},
+ }
};
+ if ($scfg->{snapext}) {
+ $features->{snapshot} = {
+ current => { raw => 1, qcow2 => 1 },
+ snap => { raw => 1, qcow2 => 1 },
+ }
+ } else {
+ $features->{snapshot} = {
+ current => { qcow2 => 1 },
+ snap => { qcow2 => 1 },
+ };
+ }
+
if ($feature eq 'clone') {
if (
defined($opts->{valid_target_formats})
@@ -1222,7 +1315,9 @@ sub list_images {
}
if ($vollist) {
- my $found = grep { $_ eq $volid } @$vollist;
+ my $search_volid = $volid;
+ $search_volid =~ s/-snap-.*\./\./;
+ my $found = grep { $_ eq $search_volid } @$vollist;
next if !$found;
}
@@ -1380,7 +1475,53 @@ sub status {
sub volume_snapshot_info {
my ($class, $scfg, $storeid, $volname) = @_;
- die "volume_snapshot_info is not implemented for $class";
+ die "volume_snapshot_info is not implemented for $class" if !$scfg->{snapext};
+
+ my $path = $class->filesystem_path($scfg, $volname);
+
+ my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
+
+ my $basevolname = $volname;
+ $basevolname =~ s/\.(raw|qcow2)$//;
+
+ my $snapshots = $class->list_images($storeid, $scfg, $vmid);
+ my $info = {};
+ for my $snap (@$snapshots) {
+
+ my $volid = $snap->{volid};
+ next if ($volid !~ m/$basevolname/);
+
+ my (undef, $snapvolname) = parse_volume_id($volid);
+ my $snapname = get_snapname_from_path($volid);
+ my $snapfile = $class->filesystem_path($scfg, $snapvolname, $snapname);
+ $snapname = 'base' if !$snapname;
+
+ my $format = $snap->{'format'};
+ my $parentfile = $snap->{parent};
+ my $parentname = get_snapname_from_path($parentfile) if $parentfile;
+ $parentname = 'base' if !$parentname && $parentfile;
+
+ $info->{$snapname}->{file} = $snapfile;
+ $info->{$snapname}->{volid} = $volid;
+ $info->{$snapname}->{'format'} = $format;
+ $info->{$snapname}->{parent} = $parentname if $parentname;
+ $info->{$parentname}->{child} = $snapname if $parentname;
+ }
+
+ my $current = undef;
+ for my $id (keys %$info) {
+ my $snap = $info->{$id};
+ die "error: snap $id: you can't have multiple current snapshot: current:$current\n" if !$snap->{child} && $current;
+ $current = $id if !$snap->{child};
+ }
+
+ if ($current) {
+ $info->{current}->{file} = $info->{$current}->{file};
+ $info->{current}->{'format'} = $info->{$current}->{'format'};
+ $info->{current}->{parent} = $info->{$current}->{parent};
+ }
+
+ return $info;
}
sub activate_storage {
@@ -1764,4 +1905,38 @@ sub config_aware_base_mkdir {
}
}
+sub get_snap_path {
+ my ($path, $snap) = @_;
+
+ my $basepath = "";
+ my $baseformat = "";
+ if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-(.*))?\.(raw|qcow2)/) {
+ $basepath = $1;
+ $baseformat = $8;
+ }
+ my $format = $snap ? 'qcow2' : $baseformat;
+ my $snappath = $snap ? $basepath."-snap-$snap.$format" : undef;
+
+ return $snappath;
+}
+
+sub get_snapname_from_path {
+ my ($path) = @_;
+
+ if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-(.*))?\.(raw|qcow2)/) {
+ my $snapname = $7;
+ return $snapname;
+ }
+ die "can't parse snapname from path";
+}
+
+sub get_current_snapshot {
+ my ($class, $scfg, $storeid, $volname) = @_;
+ #IMPROVE ME: faster way to find current snapshot? (search the most recent created snapshot file ? need to works with lvm volume too)
+
+ return if !$scfg->{snapext};
+ my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
+ return $snapshots->{current};
+}
+
1;
--
2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
[not found] <20240930113153.2896648-1-alexandre.derumier@groupe-cyllene.com>
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support Alexandre Derumier via pve-devel
@ 2024-09-30 11:31 ` Alexandre Derumier via pve-devel
2024-10-23 10:14 ` Fabian Grünbichler
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot) Alexandre Derumier via pve-devel
2024-10-20 13:03 ` [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support DERUMIER, Alexandre via pve-devel
3 siblings, 1 reply; 27+ messages in thread
From: Alexandre Derumier via pve-devel @ 2024-09-30 11:31 UTC (permalink / raw)
To: pve-devel; +Cc: Alexandre Derumier
[-- Attachment #1: Type: message/rfc822, Size: 8987 bytes --]
From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH v2 qemu-server 1/1] implement external snapshot
Date: Mon, 30 Sep 2024 13:31:52 +0200
Message-ID: <20240930113153.2896648-3-alexandre.derumier@groupe-cyllene.com>
Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
PVE/QemuServer.pm | 108 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 95 insertions(+), 13 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index b26da505..1523df15 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1549,7 +1549,11 @@ sub print_drive_commandline_full {
} else {
if ($storeid) {
$path = PVE::Storage::path($storecfg, $volid);
- $format //= qemu_img_format($scfg, $volname);
+ if ($scfg->{snapext}) {
+ $format //= qemu_img_format($scfg, $path);
+ } else {
+ $format //= qemu_img_format($scfg, $volname);
+ }
} else {
$path = $volid;
$format //= "raw";
@@ -4713,9 +4717,31 @@ sub qemu_volume_snapshot {
my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
my $running = check_running($vmid);
+ my $do_snapshots_with_qemu = do_snapshots_with_qemu($storecfg, $volid, $deviceid) if $running;
+ if ($do_snapshots_with_qemu) {
+ if($do_snapshots_with_qemu == 2) {
+ my $snapshot_file = PVE::Storage::path($storecfg, $volid, $snap);
+ #allocate volume is external snapshot is a block device
+ my $snap_volid = undef;
+ if ($snapshot_file =~ m|^/dev/.+|) {
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+ my $size = PVE::Storage::volume_size_info($storecfg, $volid, 5);
+ #add 100M for qcow2 headers
+ $size = int($size/1024) + (100*1024);
+ my $snap_volname = $volname."-snap-$snap";
+ $snap_volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, 'raw', $snap_volname, $size);
+ PVE::Storage::activate_volumes($storecfg, [$snap_volid]);
+ }
+
+ eval { mon_cmd($vmid, 'blockdev-snapshot-sync', device => $deviceid, 'snapshot-file' => $snapshot_file, format => 'qcow2') };
- if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
- mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
+ if ($@) {
+ PVE::Storage::vdisk_free($storecfg, $snap_volid) if $snapshot_file =~ m|^/dev/.+|;
+ die $@;
+ }
+ } else {
+ mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
+ }
} else {
PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
}
@@ -4735,13 +4761,52 @@ sub qemu_volume_snapshot_delete {
});
}
- if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
- mon_cmd(
- $vmid,
- 'blockdev-snapshot-delete-internal-sync',
- device => $attached_deviceid,
- name => $snap,
- );
+ my $do_snapshots_with_qemu = do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid) if $running;
+ if ($attached_deviceid && $do_snapshots_with_qemu) {
+
+ if ($do_snapshots_with_qemu == 2) {
+
+ my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
+
+ my $currentpath = $snapshots->{current}->{file};
+ my $snappath = $snapshots->{$snap}->{file};
+ my $snapvolid = $snapshots->{$snap}->{volid};
+ return if !$snappath; #already delete
+
+ my $parentsnap = $snapshots->{$snap}->{parent};
+ die "error: we can't find a parent for this snapshot" if !$parentsnap;
+
+ my $parentpath = $snapshots->{$parentsnap}->{file};
+ my $parentformat = $snapshots->{$parentsnap}->{'format'} if $parentsnap;
+
+ print "block-commit top:$snappath base:$parentpath\n";
+
+ my $job_id = "commit-$attached_deviceid";
+ my $jobs = {};
+ mon_cmd(
+ $vmid,
+ 'block-commit',
+ 'job-id' => $job_id,
+ device => $attached_deviceid,
+ top => $snappath,
+ base => $parentpath,
+ );
+ $jobs->{$job_id} = {};
+
+ #if we delete the current, block-job-complete to finish
+ my $completion = $currentpath eq $snappath ? 'complete' : 'auto';
+ qemu_drive_mirror_monitor($vmid, undef, $jobs, $completion, 0, 'commit');
+ #fixme. delete the disks when all jobs are ok ?
+ #delete the lvm volume
+ PVE::Storage::vdisk_free($storecfg, $snapvolid);
+ } else {
+ mon_cmd(
+ $vmid,
+ 'blockdev-snapshot-delete-internal-sync',
+ device => $attached_deviceid,
+ name => $snap,
+ );
+ }
} else {
PVE::Storage::volume_snapshot_delete(
$storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
@@ -7776,6 +7841,8 @@ sub do_snapshots_with_qemu {
return 1;
}
+ return 2 if $scfg->{snapext};
+
if ($volid =~ m/\.(qcow2|qed)$/){
return 1;
}
@@ -7849,8 +7916,23 @@ sub qemu_img_convert {
if ($src_storeid) {
PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
- $src_format = qemu_img_format($src_scfg, $src_volname);
- $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
+ if($src_scfg->{snapext}) {
+ my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $src_volid);
+ $snapname = 'current' if !$snapname;
+ #if we don't clone the current image
+ #need to use the parent if available, as it's the readonly image view
+ #at the time of the snapshot
+ my $parentsnap = $snapshots->{$snapname}->{parent};
+ $snapname = $parentsnap if($parentsnap && $snapname ne 'current');
+ $src_format = $snapshots->{$snapname}->{format};
+ $src_path = $snapshots->{$snapname}->{file};
+ $src_volid = $snapshots->{$snapname}->{volid};
+ $snapname = undef;
+ PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
+ } else {
+ $src_format = qemu_img_format($src_scfg, $src_volname);
+ $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
+ }
$src_is_iscsi = ($src_path =~ m|^iscsi://|);
$cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
} elsif (-f $src_volid || -b $src_volid) {
@@ -7920,7 +8002,7 @@ sub qemu_img_format {
# FIXME: this entire function is kind of weird given that `parse_volname`
# also already gives us a format?
- my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi';
+ my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi' || $scfg->{snapext};
if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
return $1;
--
2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
[not found] <20240930113153.2896648-1-alexandre.derumier@groupe-cyllene.com>
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support Alexandre Derumier via pve-devel
2024-09-30 11:31 ` [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot Alexandre Derumier via pve-devel
@ 2024-09-30 11:31 ` Alexandre Derumier via pve-devel
2024-10-23 10:13 ` Fabian Grünbichler
2024-10-20 13:03 ` [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support DERUMIER, Alexandre via pve-devel
3 siblings, 1 reply; 27+ messages in thread
From: Alexandre Derumier via pve-devel @ 2024-09-30 11:31 UTC (permalink / raw)
To: pve-devel; +Cc: Alexandre Derumier
[-- Attachment #1: Type: message/rfc822, Size: 18125 bytes --]
From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
Date: Mon, 30 Sep 2024 13:31:53 +0200
Message-ID: <20240930113153.2896648-4-alexandre.derumier@groupe-cyllene.com>
Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
src/PVE/Storage.pm | 2 +
src/PVE/Storage/LvmQcow2Plugin.pm | 460 ++++++++++++++++++++++++++++++
src/PVE/Storage/Makefile | 3 +-
3 files changed, 464 insertions(+), 1 deletion(-)
create mode 100644 src/PVE/Storage/LvmQcow2Plugin.pm
diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
index 57b2038..119998f 100755
--- a/src/PVE/Storage.pm
+++ b/src/PVE/Storage.pm
@@ -28,6 +28,7 @@ use PVE::Storage::Plugin;
use PVE::Storage::DirPlugin;
use PVE::Storage::LVMPlugin;
use PVE::Storage::LvmThinPlugin;
+use PVE::Storage::LvmQcow2Plugin;
use PVE::Storage::NFSPlugin;
use PVE::Storage::CIFSPlugin;
use PVE::Storage::ISCSIPlugin;
@@ -54,6 +55,7 @@ our $KNOWN_EXPORT_FORMATS = ['raw+size', 'tar+size', 'qcow2+size', 'vmdk+size',
PVE::Storage::DirPlugin->register();
PVE::Storage::LVMPlugin->register();
PVE::Storage::LvmThinPlugin->register();
+PVE::Storage::LvmQcow2Plugin->register();
PVE::Storage::NFSPlugin->register();
PVE::Storage::CIFSPlugin->register();
PVE::Storage::ISCSIPlugin->register();
diff --git a/src/PVE/Storage/LvmQcow2Plugin.pm b/src/PVE/Storage/LvmQcow2Plugin.pm
new file mode 100644
index 0000000..68c8686
--- /dev/null
+++ b/src/PVE/Storage/LvmQcow2Plugin.pm
@@ -0,0 +1,460 @@
+package PVE::Storage::LvmQcow2Plugin;
+
+use strict;
+use warnings;
+
+use IO::File;
+
+use PVE::Tools qw(run_command trim);
+use PVE::Storage::Plugin;
+use PVE::Storage::LVMPlugin;
+use PVE::JSONSchema qw(get_standard_option);
+
+use base qw(PVE::Storage::LVMPlugin);
+
+# Configuration
+
+sub type {
+ return 'lvmqcow2';
+}
+
+sub plugindata {
+ return {
+ #container not yet implemented #need to implemented dm-qcow2
+ content => [ {images => 1, rootdir => 1}, { images => 1 }],
+ };
+}
+
+sub properties {
+ return {
+ };
+}
+
+sub options {
+ return {
+ vgname => { fixed => 1 },
+ nodes => { optional => 1 },
+ shared => { optional => 1 },
+ disable => { optional => 1 },
+ saferemove => { optional => 1 },
+ saferemove_throughput => { optional => 1 },
+ content => { optional => 1 },
+ base => { fixed => 1, optional => 1 },
+ tagged_only => { optional => 1 },
+ bwlimit => { optional => 1 },
+ snapext => { fixed => 1 },
+ };
+}
+
+# Storage implementation
+
+sub parse_volname {
+ my ($class, $volname) = @_;
+
+ PVE::Storage::Plugin::parse_lvm_name($volname);
+ my $format = $volname =~ m/^(.*)-snap-/ ? 'qcow2' : 'raw';
+
+ if ($volname =~ m/^((vm|base)-(\d+)-\S+)$/) {
+ return ('images', $1, $3, undef, undef, $2 eq 'base', $format);
+ }
+
+ die "unable to parse lvm volume name '$volname'\n";
+}
+
+sub filesystem_path {
+ my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
+
+ my ($vtype, $name, $vmid) = $class->parse_volname($volname);
+
+ my $vg = $scfg->{vgname};
+
+ my $path = "/dev/$vg/$name";
+
+ if($snapname) {
+ $path = get_snap_volname($path, $snapname);
+ } elsif ($current_snap) {
+ $path = $current_snap->{file};
+ }
+
+ return wantarray ? ($path, $vmid, $vtype) : $path;
+}
+
+sub create_base {
+ my ($class, $storeid, $scfg, $volname) = @_;
+
+ my $vg = $scfg->{vgname};
+
+ my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
+ $class->parse_volname($volname);
+
+ die "create_base not possible with base image\n" if $isBase;
+
+ die "unable to create base volume - found snapshot" if $class->snapshot_exist($scfg, $storeid, $volname);
+
+ my $newname = $name;
+ $newname =~ s/^vm-/base-/;
+
+ my $cmd = ['/sbin/lvrename', $vg, $volname, $newname];
+ run_command($cmd, errmsg => "lvrename '$vg/$volname' => '$vg/$newname' error");
+
+ # set inactive, read-only flags
+ $cmd = ['/sbin/lvchange', '-an', '-pr', "$vg/$newname"];
+ eval { run_command($cmd); };
+ warn $@ if $@;
+
+ my $newvolname = $newname;
+
+ return $newvolname;
+}
+
+sub clone_image {
+ my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
+
+ die "can't clone images in lvm storage\n";
+}
+
+sub alloc_image {
+ my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
+
+ die "unsupported format '$fmt'" if $fmt ne 'raw';
+
+ die "illegal name '$name' - should be 'vm-$vmid-*'\n"
+ if $name && $name !~ m/^vm-$vmid-/;
+
+ my $vgs = PVE::Storage::LVMPlugin::lvm_vgs();
+
+ my $vg = $scfg->{vgname};
+
+ die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
+
+ my $free = int($vgs->{$vg}->{free});
+
+ die "not enough free space ($free < $size)\n" if $free < $size;
+
+ $name = $class->find_free_diskname($storeid, $scfg, $vmid)
+ if !$name;
+
+ my $tags = ["pve-vm-$vmid"];
+ if ($name =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
+ push @$tags, "\@pve-$1";
+ }
+
+ PVE::Storage::LVMPlugin::lvcreate($vg, $name, $size, $tags);
+
+ return $name;
+}
+
+sub volume_snapshot_info {
+ my ($class, $scfg, $storeid, $volname) = @_;
+
+ return $class->list_snapshots($scfg, $storeid, $volname);
+}
+
+sub activate_volume {
+ my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
+
+ my $lvm_activate_mode = 'ey';
+ my $tag = undef;
+
+ #activate volume && all volumes snapshots by tag
+ if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
+ $tag = "\@pve-vm-$4-disk-$5";
+ }
+
+ my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $tag];
+ run_command($cmd, errmsg => "can't activate LV '$tag'");
+
+ $cmd = ['/sbin/lvchange', '--refresh', $tag];
+ run_command($cmd, errmsg => "can't refresh LV '$tag' for activation");
+}
+
+sub deactivate_volume {
+ my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
+
+ my $tag = undef;
+ #deactivate volume && all volumes snasphots by tag
+ if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
+ $tag = "\@pve-vm-$4-disk-$5";
+ }
+
+ my $cmd = ['/sbin/lvchange', '-aln', $tag];
+ run_command($cmd, errmsg => "can't deactivate LV '$tag'");
+}
+
+sub volume_resize {
+ my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
+
+ #we should resize the base image and parents snapshots,
+ #but how to manage rollback ?
+
+ die "can't resize if snasphots exist" if $class->snapshot_exist($scfg, $storeid, $volname);
+
+ return 1;
+}
+
+sub volume_snapshot {
+ my ($class, $scfg, $storeid, $volname, $snap) = @_;
+
+ $class->activate_volume($storeid, $scfg, $volname, undef, {});
+
+ my $current_path = $class->path($scfg, $volname, $storeid);
+ my $current_format = (PVE::Storage::Plugin::file_size_info($current_path))[1];
+ my $snappath = get_snap_volname($current_path, $snap);
+
+ my $snapvolname = get_snap_volname($volname, $snap);
+ #allocate lvm snapshot volume
+ my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
+ $class->parse_volname($volname);
+ my $size = $class->volume_size_info($scfg, $storeid, $volname, 5);
+ #add 100M for qcow2 headers
+ $size = int($size/1024) + (100*1024);
+
+ $class->alloc_image($storeid, $scfg, $vmid, 'raw', $snapvolname, $size);
+
+ # create the qcow2 fs
+ eval {
+ my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $current_path,
+ '-F', $current_format, '-f', 'qcow2', $snappath];
+ my $options = "extended_l2=on,";
+ $options .= PVE::Storage::Plugin::preallocation_cmd_option($scfg, 'qcow2');
+ push @$cmd, '-o', $options;
+ run_command($cmd);
+ };
+ if ($@) {
+ eval { $class->free_image($storeid, $scfg, $snapvolname, 0) };
+ warn $@ if $@;
+ }
+}
+
+# Asserts that a rollback to $snap on $volname is possible.
+# If certain snapshots are preventing the rollback and $blockers is an array
+# reference, the snapshot names can be pushed onto $blockers prior to dying.
+sub volume_rollback_is_possible {
+ my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
+
+ my $path = $class->filesystem_path($scfg, $volname);
+ my $snappath = get_snap_volname($path, $snap);
+ my $currentpath = $class->path($scfg, $volname, $storeid);
+ return 1 if $currentpath eq $snappath;
+
+ die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
+
+ return 1;
+}
+
+sub volume_snapshot_rollback {
+ my ($class, $scfg, $storeid, $volname, $snap) = @_;
+
+ $class->activate_volume($storeid, $scfg, $volname, undef, {});
+ #simply delete the current snapshot and recreate it
+
+ my $snapvolname = get_snap_volname($volname, $snap);
+
+ $class->free_image($storeid, $scfg, $snapvolname, 0);
+ $class->volume_snapshot($scfg, $storeid, $volname, $snap);
+}
+
+sub volume_snapshot_delete {
+ my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
+
+ return 1 if $running;
+
+ $class->activate_volume($storeid, $scfg, $volname, undef, {});
+
+ my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
+ my $snappath = $snapshots->{$snap}->{file};
+ if(!$snappath) {
+ warn "$snap already deleted. skip\n";
+ return;
+ }
+
+ my $snapvolname = $snapshots->{$snap}->{volname};
+ my $parentsnap = $snapshots->{$snap}->{parent};
+ my $childsnap = $snapshots->{$snap}->{child};
+ die "error: can't find a parent for this snapshot" if !$parentsnap;
+
+ my $parentpath = $snapshots->{$parentsnap}->{file};
+ my $parentformat = $snapshots->{$parentsnap}->{'format'} if $parentsnap;
+ my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
+ my $childformat = $snapshots->{$childsnap}->{'format'} if $childsnap;
+
+ print "merge snapshot $snap to $parentsnap\n";
+ my $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
+ run_command($cmd);
+
+ #if we delete an intermediate snapshot, we need to link upper snapshot to base snapshot
+ if($childpath && -e $childpath) {
+ die "missing parentsnap snapshot to rebase child $childpath\n" if !$parentpath;
+ print "link $childsnap to $parentsnap\n";
+ $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath, '-F', $parentformat, '-f', $childformat, $childpath];
+ run_command($cmd);
+ }
+
+ #delete the snapshot
+ $class->free_image($storeid, $scfg, $snapvolname, 0);
+
+ return;
+}
+
+sub volume_has_feature {
+ my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
+
+ my $features = {
+ snapshot => { current => 1 },
+# clone => { base => 1, snap => 1}, #don't allow to clone as we can't activate the base between different host ?
+ template => { current => 1},
+ copy => { base => 1, current => 1, snap => 1},
+ sparseinit => { base => 1, current => 1},
+ rename => {current => 1},
+ };
+
+ my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
+ $class->parse_volname($volname);
+
+ my $key = undef;
+ if($snapname){
+ $key = 'snap';
+ }else{
+ $key = $isBase ? 'base' : 'current';
+ }
+ return 1 if $features->{$feature}->{$key};
+
+ return undef;
+}
+
+sub get_snap_volname {
+ my ($path, $snap) = @_;
+
+ my $basepath = "";
+ my $baseformat = "";
+ if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-Z0-9]+))?(\.(raw|qcow2))?/) {
+ $basepath = $1;
+ $baseformat = $8;
+ }
+ my $snapvolname = $basepath."-snap-$snap.qcow2";
+ return $snapvolname;
+}
+
+sub get_snapname_from_path {
+ my ($path) = @_;
+
+ if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-Z0-9]+))?(\.(raw|qcow2))?/) {
+ my $snapname = $7;
+ return $snapname;
+ }
+ die "can't parse snapname from path $path";
+}
+
+sub get_current_snapshot {
+ my ($class, $scfg, $storeid, $volname) = @_;
+
+ #get more recent ctime volume
+ return $class->list_snapshots($scfg, $storeid, $volname, 1);
+}
+my $check_tags = sub {
+ my ($tags) = @_;
+
+ return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
+};
+
+sub list_images {
+ my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
+
+ my $vgname = $scfg->{vgname};
+
+ $cache->{lvs} = PVE::Storage::LVMPlugin::lvm_list_volumes() if !$cache->{lvs};
+
+ my $res = [];
+
+ if (my $dat = $cache->{lvs}->{$vgname}) {
+
+ foreach my $volname (keys %$dat) {
+
+ next if $volname !~ m/^(vm|base)-(\d+)-/;
+ my $owner = $2;
+
+ my $info = $dat->{$volname};
+
+ next if $scfg->{tagged_only} && !&$check_tags($info->{tags});
+
+ # Allow mirrored and RAID LVs
+ next if $info->{lv_type} !~ m/^[-mMrR]$/;
+
+ my $volid = "$storeid:$volname";
+
+ if ($vollist) {
+ my $found = grep { $_ eq $volid } @$vollist;
+ next if !$found;
+ } else {
+ next if defined($vmid) && ($owner ne $vmid);
+ }
+
+ push @$res, {
+ volid => $volid, format => 'raw', size => $info->{lv_size}, vmid => $owner,
+ ctime => $info->{ctime},
+ };
+ }
+ }
+
+ return $res;
+}
+
+sub list_snapshots {
+ my ($class, $scfg, $storeid, $volname, $current_only) = @_;
+
+ my $vgname = $scfg->{vgname};
+
+ my $basevolname = $volname;
+ my $lvs = PVE::Storage::LVMPlugin::lvm_list_volumes($vgname);
+
+ my $vg = $lvs->{$vgname};
+
+ my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
+ my $snapshots = $class->list_images($storeid, $scfg, $vmid);
+
+ my $info = {};
+ for my $snap (@$snapshots) {
+ my $snap_volid = $snap->{volid};
+ next if ($snap_volid !~ m/$basevolname/);
+
+ my $snapname = get_snapname_from_path($snap_volid);
+ my (undef, $snap_volname) = PVE::Storage::parse_volume_id($snap_volid);
+ my $snapfile = $class->filesystem_path($scfg, $snap_volname, $snapname);
+ $snapname = 'base' if !$snapname;
+ $info->{$snapname}->{file} = $snapfile;
+ $info->{$snapname}->{volname} = $snap_volname;
+ $info->{$snapname}->{volid} = $snap_volid;
+ $info->{$snapname}->{ctime} = $snap->{ctime};
+
+ if (!$current_only) {
+ my (undef, $format, undef, $parentfile, undef) = PVE::Storage::Plugin::file_size_info($snapfile);
+ next if !$parentfile && $snapname ne 'base'; #bad unlinked snasphot
+
+ my $parentname = get_snapname_from_path($parentfile) if $parentfile;
+ $parentname = 'base' if !$parentname && $parentfile;
+
+ $info->{$snapname}->{'format'} = $format;
+ $info->{$snapname}->{parent} = $parentname if $parentname;
+ $info->{$parentname}->{child} = $snapname if $parentname;
+ }
+ }
+
+ my @snapshots_sorted = sort { $info->{$b}{ctime} <=> $info->{$a}{ctime} } keys %$info;
+ my $current_snapname = $snapshots_sorted[0];
+ my $current_snapshot = $info->{$current_snapname};
+ return $current_snapshot if $current_only;
+
+ $info->{current} = { %$current_snapshot };
+ return $info;
+}
+
+sub snapshot_exist {
+ my ($class, $scfg, $storeid, $volname) = @_;
+
+ my $basepath = $class->filesystem_path($scfg, $volname);
+ my $currentpath = $class->path($scfg, $volname, $storeid);
+
+ die "can't resize if snasphots exist" if $currentpath ne $basepath;
+
+}
+1;
diff --git a/src/PVE/Storage/Makefile b/src/PVE/Storage/Makefile
index d5cc942..1af8aab 100644
--- a/src/PVE/Storage/Makefile
+++ b/src/PVE/Storage/Makefile
@@ -14,7 +14,8 @@ SOURCES= \
PBSPlugin.pm \
BTRFSPlugin.pm \
LvmThinPlugin.pm \
- ESXiPlugin.pm
+ ESXiPlugin.pm \
+ LvmQcow2Plugin.pm
.PHONY: install
install:
--
2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
[not found] <20240930113153.2896648-1-alexandre.derumier@groupe-cyllene.com>
` (2 preceding siblings ...)
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot) Alexandre Derumier via pve-devel
@ 2024-10-20 13:03 ` DERUMIER, Alexandre via pve-devel
2024-10-20 17:34 ` Roland privat via pve-devel
3 siblings, 1 reply; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-20 13:03 UTC (permalink / raw)
To: pve-devel; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 19550 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>
Subject: Re: [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
Date: Sun, 20 Oct 2024 13:03:45 +0000
Message-ID: <6e37f6d8cc4d8bd65c72c58edc429407d64fafab.camel@groupe-cyllene.com>
Hi,
Any comment about this patch series ?
I really think that external snapshot could be a great feature (as I
still see report on the forum about freeze on snasphot deletion),
and support for lvm and shared san is really a feature than enterprise
users are waiting. (To be honest, I have a lot of customers stuck on
vmware because of this)
About my previous patch serie, with lvm dynamic extent, I think I'll
give up, it seem to be too complex, with too many corner case.
(So keeping shared lvm + external snapshot without thin provisioning)
In Parallel, I have done more tests with gfs2/ocfs2, and I finally
found a
way to have good performance on block allocation for thin non
preallocated qcow2 file.
Currently, I'm around 200 iops on gfs2 with 4k randwrite (instead
20000iops...)
(fio --rw=randwrite ---direct=1 -bs=4k --ioengine=libaio --iodepth=64 -
-filename=/dev/sdX)
qemu have "preallocate" filter feature
https://patchwork.kernel.org/project/qemu-devel/cover/20200814130348.20625-1-vsementsov@virtuozzo.com/
-drive driver=qcow2,file.driver=preallocate,file.prealloc-
size=1073741824,file.file.driver=file,file.file.filename=/mnt/pve/gfs2s
an/images/100/vm-100-disk-0.qcow2,id=drive-scsi2,if=none
which allow to prellocate on the fly more blocks than requested.
(for example, you need to write a 4k block on a unallocated block, I'll
reserve 128MB for example).
This reduce a lot the number of locks and round-trip network for fs
like ocfs2/gfs2 when you have a lot of write.
With qcow2 format allocating random blocks consecutively, that's
working very well.
I have done a small test, the fio result is around to 20~30k write.
I'll send a patch soon after doing more test.
Regards,
Alexandre
-------- Message initial --------
De: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
À: pve-devel@lists.proxmox.com
Objet: [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2
snapshot support
Date: 30/09/2024 13:31:50
This patch series implement qcow2 external snapshot support for files
&& lvm volumes
The current internal qcow2 snapshots have a lot of performance
problems.
I have tested through nfs and also local filesystem
I see that Fiona don't have same result than me, but I got something
like 200~300iops
vs 20000 iops with 4k randwrite when a snapshot exist.
The result is even worst on a shared filesystem like ocfs2 or gfs2.
(around 80 iops)
I think (I'm not 100% sure) this is mostly because metadatas are not
preallocated
anymore with qcow2 internal snap.
With external snapshot, I almost don't have any performance impact when
a snapshot exist.
Also other bugs are freeze/lock reported by users since years on
snapshots delete on nfs
https://antiphishing.vadesecure.com/v4?f=S1Zkd042VWdrZG5qQUxxWk5ps4t67k
NuHsBZzdzhpquLKuXqTZLIq2K1DfKr9N61yBafm7AuAITd6bHtRU4zEQ&i=MlZSTzBhZFZ6
Nzl4c3EyN5T6buHjA4kKs6Oz9IPjCIg&k=F1is&r=cm1qVmRYUWk2WXhYZVFHWA0PXtTaYx
z7-FIOTkZBm34_dHdSch-
gXn7ST9eGhQLN&s=64b60d6fd396d266b432ee693cc8f61d2632a8524491fef07cef3c3
f51c98871&u=https%3A%2F%2Fforum.proxmox.com%2Fthreads%2Fsnapshot-
removal-jams-the-vm.111648%2F
(The disk access seem to be frozen during all the delete duration)
External qcow2 snapshots also allow snapshot of raw devices ,so 0
performance impact without snapshots.
This also open doors for remote snapshot export-import for storage
replication.
This V2 introduce support for qcow2 external snapshot for lvm, extra
lvm
volume is created for each snapsphot and formated with qcow2.
This is a lot more performant than lvm (non-thin/nomedata) snapshot,
and allow to use
it for shared lvm. (I have another patch series for thick lvm dynamic
extend, but if we could have at minimum
snapshot working, it could great :)
I have tested: snasphot, snap rollback, snap delete, clone, move disk,
rename disk, create_base. (online && offline)
lxc is not yet supported, but I think we could look to implement the
recent dm-qcow2 kernel block driver
https://antiphishing.vadesecure.com/v4?f=S1Zkd042VWdrZG5qQUxxWk5ps4t67k
NuHsBZzdzhpquLKuXqTZLIq2K1DfKr9N61yBafm7AuAITd6bHtRU4zEQ&i=MlZSTzBhZFZ6
Nzl4c3EyN5T6buHjA4kKs6Oz9IPjCIg&k=F1is&r=cm1qVmRYUWk2WXhYZVFHWA0PXtTaYx
z7-FIOTkZBm34_dHdSch-
gXn7ST9eGhQLN&s=1865f514f95ac1d8e0088b598376751d4d98fa25de6a8b2868a74f9
2ac661cfa&u=https%3A%2F%2Flore.kernel.org%2Flkml%2F164846619932.251310.
3668540533992131988.stgit%40pro%2FT%2F
storage.cfg example:
dir: local2
path /var/liv/vz
content snippets,vztmpl,backup,images,iso,rootdir
snapext 1
lvmqcow2:test
vgname test
snapext 1
content images
changelog v2:
implement lvm with external qcow2 snapshots
pve-storage:
Alexandre Derumier (2):
add external snasphot support
add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
src/PVE/Storage.pm | 2 +
src/PVE/Storage/DirPlugin.pm | 1 +
src/PVE/Storage/LvmQcow2Plugin.pm | 460 ++++++++++++++++++++++++++++++
src/PVE/Storage/Makefile | 3 +-
src/PVE/Storage/Plugin.pm | 225 +++++++++++++--
5 files changed, 665 insertions(+), 26 deletions(-)
create mode 100644 src/PVE/Storage/LvmQcow2Plugin.pm
qemu-server:
Alexandre Derumier (1):
implement external snapshot
PVE/QemuServer.pm | 108 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 95 insertions(+), 13 deletions(-)
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
2024-10-20 13:03 ` [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support DERUMIER, Alexandre via pve-devel
@ 2024-10-20 17:34 ` Roland privat via pve-devel
2024-10-20 19:08 ` Esi Y via pve-devel
[not found] ` <CABtLnHqZVhDKnog6jaUBP4HcSwfanyEzWeLdUXnzJs2esJQQkA@mail.gmail.com>
0 siblings, 2 replies; 27+ messages in thread
From: Roland privat via pve-devel @ 2024-10-20 17:34 UTC (permalink / raw)
To: Proxmox VE development discussion; +Cc: Roland privat
[-- Attachment #1: Type: message/rfc822, Size: 6300 bytes --]
From: Roland privat <devzero@web.de>
To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
Date: Sun, 20 Oct 2024 19:34:05 +0200
Message-ID: <4A4D6F36-75C7-4346-BA43-EED2DD7B3F70@web.de>
i never understood, why qemu has inline snapshot by default. from an admin perspective it simply sucks. for example you won‘t know when something goes wrong on removal, how much orphaned data is being left. furthermore, the freezes on removal are a pita for us and we often avoid live snapshot removal because of this. this is one of the reasons why i think , proxmox isn‘t on the same enterprise level like vmware.
so, i strongly vote for inclusion of this feature and will happily test it, when available
regards
roland
> Am 20.10.2024 um 15:04 schrieb DERUMIER, Alexandre via pve-devel <pve-devel@lists.proxmox.com>:
>
>
> <mime-attachment>
> _______________________________________________
> pve-devel mailing list
> pve-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
2024-10-20 17:34 ` Roland privat via pve-devel
@ 2024-10-20 19:08 ` Esi Y via pve-devel
[not found] ` <CABtLnHqZVhDKnog6jaUBP4HcSwfanyEzWeLdUXnzJs2esJQQkA@mail.gmail.com>
1 sibling, 0 replies; 27+ messages in thread
From: Esi Y via pve-devel @ 2024-10-20 19:08 UTC (permalink / raw)
To: Proxmox VE development discussion; +Cc: Esi Y
[-- Attachment #1: Type: message/rfc822, Size: 7131 bytes --]
From: Esi Y <esiy0676+proxmox@gmail.com>
To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
Date: Sun, 20 Oct 2024 21:08:54 +0200
Message-ID: <CABtLnHqZVhDKnog6jaUBP4HcSwfanyEzWeLdUXnzJs2esJQQkA@mail.gmail.com>
QEMU was not designed to use internal snapshots "by default":
https://wiki.qemu.org/Features/Snapshots
Discussions on this are discouraged, to say the least:
https://forum.proxmox.com/threads/blockdev-snapshot-sync-to-snapshot-a-raw-file.38188/#post-712825
On Sun, Oct 20, 2024 at 7:39 PM Roland privat via pve-devel
<pve-devel@lists.proxmox.com> wrote:
>
>
>
>
> ---------- Forwarded message ----------
> From: Roland privat <devzero@web.de>
> To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
> Cc:
> Bcc:
> Date: Sun, 20 Oct 2024 19:34:05 +0200
> Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
> i never understood, why qemu has inline snapshot by default. from an admin perspective it simply sucks. for example you won‘t know when something goes wrong on removal, how much orphaned data is being left. furthermore, the freezes on removal are a pita for us and we often avoid live snapshot removal because of this. this is one of the reasons why i think , proxmox isn‘t on the same enterprise level like vmware.
>
> so, i strongly vote for inclusion of this feature and will happily test it, when available
>
> regards
> roland
>
> > Am 20.10.2024 um 15:04 schrieb DERUMIER, Alexandre via pve-devel <pve-devel@lists.proxmox.com>:
> >
> >
> > <mime-attachment>
> > _______________________________________________
> > pve-devel mailing list
> > pve-devel@lists.proxmox.com
> > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
>
>
>
>
>
> ---------- Forwarded message ----------
> From: Roland privat via pve-devel <pve-devel@lists.proxmox.com>
> To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
> Cc: Roland privat <devzero@web.de>
> Bcc:
> Date: Sun, 20 Oct 2024 19:34:05 +0200
> Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
> _______________________________________________
> pve-devel mailing list
> pve-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
[not found] ` <CABtLnHqZVhDKnog6jaUBP4HcSwfanyEzWeLdUXnzJs2esJQQkA@mail.gmail.com>
@ 2024-10-22 6:39 ` Thomas Lamprecht
2024-10-22 9:51 ` Esi Y via pve-devel
0 siblings, 1 reply; 27+ messages in thread
From: Thomas Lamprecht @ 2024-10-22 6:39 UTC (permalink / raw)
To: Esi Y, Proxmox VE development discussion
Am 20/10/2024 um 21:08 schrieb Esi Y:
> QEMU was not designed to use internal snapshots "by default":
> https://wiki.qemu.org/Features/Snapshots
Just because you find an article that mentions qcow2 and snapshots it
doesn't have to be related...
The thing you linked has not only nothing to do with the snapshots of the
qcow2 format, which are just one of the targets, it also nowhere states
what you crudely try to imply.
Let's stop this dunning-kruger fueled spew of technical nonsense on the
devel lists, ideally all other channels too. It's not welcomed here and
wasting developers time that then, among other things, delays actual
review of all patches, including this series.
> Discussions on this are discouraged, to say the least:
> https://forum.proxmox.com/threads/blockdev-snapshot-sync-to-snapshot-a-raw-file.38188/#post-712825
The reason was clearly stated here [0], so why lie to make this seem like
some big conspiracy?
[0]: https://forum.proxmox.com/threads/blockdev-snapshot-sync-to-snapshot-a-raw-file.38188/#post-712902
You must stop your toxic bad-faith behavior immediately, it was pointed out
many times by Proxmox Staff and even other community users. If that won't
work, we have to exclude you from all our channels!
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
2024-10-22 6:39 ` Thomas Lamprecht
@ 2024-10-22 9:51 ` Esi Y via pve-devel
2024-10-22 14:54 ` DERUMIER, Alexandre via pve-devel
[not found] ` <2f07646b51c85ffe01089c2481dbb9680d75cfcb.camel@groupe-cyllene.com>
0 siblings, 2 replies; 27+ messages in thread
From: Esi Y via pve-devel @ 2024-10-22 9:51 UTC (permalink / raw)
To: Proxmox VE development discussion; +Cc: Esi Y
[-- Attachment #1: Type: message/rfc822, Size: 6045 bytes --]
From: Esi Y <esiy0676+proxmox@gmail.com>
To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
Date: Tue, 22 Oct 2024 11:51:31 +0200
Message-ID: <CABtLnHrgbiZcT0oMwpuhHhMiJh_nqoM7ZLNKMWYMBXJjkXa5VA@mail.gmail.com>
On Tue, Oct 22, 2024 at 8:39 AM Thomas Lamprecht
<t.lamprecht@proxmox.com> wrote:
> The thing you linked has not only nothing to do with the snapshots of the
> qcow2 format, which are just one of the targets, it also nowhere states
> what you crudely try to imply.
It's 2016 wiki:
"Internal snapshots to images which support internal snapshots (QCOW2
& QED) are not expected to be supported initially."
"By making the snapshot-file argument of the monitor and QMP command
optional, that could be used as a request to make the snapshot
internally instead of to an external file. However, without live block
migration of an internal snapshot, there is no way to make a backup of
an internal snapshot while still leaving the VM running, so this
feature is not planned at the present. For now, the snapshot-file
argument is required, and only external snapshots are implemented."
> wasting developers time that then, among other things, delays actual
I hoped this would bump it up for Alexandre to get a response.
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
2024-10-22 9:51 ` Esi Y via pve-devel
@ 2024-10-22 14:54 ` DERUMIER, Alexandre via pve-devel
[not found] ` <2f07646b51c85ffe01089c2481dbb9680d75cfcb.camel@groupe-cyllene.com>
1 sibling, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-22 14:54 UTC (permalink / raw)
To: pve-devel; +Cc: DERUMIER, Alexandre, esiy0676+proxmox
[-- Attachment #1: Type: message/rfc822, Size: 14854 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>
Cc: "esiy0676+proxmox@gmail.com" <esiy0676+proxmox@gmail.com>
Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
Date: Tue, 22 Oct 2024 14:54:24 +0000
Message-ID: <2f07646b51c85ffe01089c2481dbb9680d75cfcb.camel@groupe-cyllene.com>
-------- Message initial --------
De: Esi Y via pve-devel <pve-devel@lists.proxmox.com>
Répondre à: Proxmox VE development discussion <pve-
devel@lists.proxmox.com>
À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Cc: Esi Y <esiy0676+proxmox@gmail.com>
Objet: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add
external qcow2 snapshot support
Date: 22/10/2024 11:51:31
> wasting developers time that then, among other things, delays actual
>>I hoped this would bump it up for Alexandre to get a response.
As far I remember, when we have implement snapshot for qcow2 (I think
in 2010~2011, I'm becoming old ^_^ ) , only internal snapshot was
possible,
because they were no block-commit job. (to merge data in parent on
snapshot deletion).
Only block-stream job was available at this time (merge snapshot to
child snapshot)
I think that redhat have mostly worked on external snapshots these last
10years (mostly because they used them for backup, but also replication
where it's not possible with internal snasphot).
And the missing block job to merge data for internal snapshot is also
why the io need to be frozen during the merge.
So, that's why I never haved use qcow2 in production (mostly ceph,
or though custom netapp api for customer using nfs with raw files).
That mean that we don't have a clean snapshot solution currently for
shared san/nas without api.
I'm trying to fix/improve both nas (nfs) && san (iscsi,lvm) snapshots
implementation.
Mainly because I have a lot of onprem customers coming from vmware with
small san (iscsi/fiberchannel) needing snapshot feature.
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support Alexandre Derumier via pve-devel
@ 2024-10-23 10:12 ` Fabian Grünbichler
2024-10-23 12:59 ` DERUMIER, Alexandre via pve-devel
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-23 10:12 UTC (permalink / raw)
To: Proxmox VE development discussion
some high level comments:
I am not sure how much we gain here with the raw support? it's a bit confusing to have a volid ending with raw, with the current volume and all but the first snapshot actually being stored in qcow2 files, with the raw file being the "oldest" snapshot in the chain..
if possible, I'd be much happier with the snapshot name in the snapshot file being a 1:1 match, see comments inline
- makes it a lot easier to understand (admin wants to manually remove snapshot "foo", if "foo" was the last snapshot then right now the volume called "foo" is actually the current contents!)
- means we don't have to do lookups via the full snapshot list all the time (e.g., if I want to do a full clone from a snapshot "foo", I can just pass the snap-foo volume to qemu-img)
the naming scheme for snapshots needs to be adapted to not clash with regular volumes:
$ pvesm alloc extsnap 131314 vm-131314-disk-foobar.qcow2 2G
Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-foobar.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=2147483648 lazy_refcounts=off refcount_bits=16
successfully created 'extsnap:131314/vm-131314-disk-foobar.qcow2'
$ qm rescan --vmid 131314
rescan volumes...
can't parse snapname from path at /usr/share/perl5/PVE/Storage/Plugin.pm line 1934.
storage_migrate needs to handle external snapshots, or at least error out. I haven't tested that part or linked clones or a lot of other advanced related actions at all ;)
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 30.09.2024 13:31 CEST geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
> ---
> src/PVE/Storage/DirPlugin.pm | 1 +
> src/PVE/Storage/Plugin.pm | 225 +++++++++++++++++++++++++++++++----
> 2 files changed, 201 insertions(+), 25 deletions(-)
>
> diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm
> index 2efa8d5..2bef673 100644
> --- a/src/PVE/Storage/DirPlugin.pm
> +++ b/src/PVE/Storage/DirPlugin.pm
> @@ -80,6 +80,7 @@ sub options {
> is_mountpoint => { optional => 1 },
> bwlimit => { optional => 1 },
> preallocation => { optional => 1 },
> + snapext => { optional => 1 },
> };
> }
>
> diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> index 6444390..5e5197a 100644
> --- a/src/PVE/Storage/Plugin.pm
> +++ b/src/PVE/Storage/Plugin.pm
> @@ -214,6 +214,11 @@ my $defaultData = {
> maximum => 65535,
> optional => 1,
> },
> + 'snapext' => {
> + type => 'boolean',
> + description => 'enable external snapshot.',
> + optional => 1,
> + },
> },
> };
>
> @@ -695,7 +700,7 @@ sub get_subdir {
> }
>
> sub filesystem_path {
> - my ($class, $scfg, $volname, $snapname) = @_;
> + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
see comment below
>
> my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
> $class->parse_volname($volname);
> @@ -703,7 +708,7 @@ sub filesystem_path {
> # Note: qcow2/qed has internal snapshot, so path is always
> # the same (with or without snapshot => same file).
> die "can't snapshot this image format\n"
> - if defined($snapname) && $format !~ m/^(qcow2|qed)$/;
> + if defined($snapname) && !$scfg->{snapext} && $format !~ m/^(qcow2|qed)$/;
>
> my $dir = $class->get_subdir($scfg, $vtype);
>
> @@ -711,13 +716,22 @@ sub filesystem_path {
>
> my $path = "$dir/$name";
>
> + if($scfg->{snapext}) {
> + my $snappath = get_snap_path($path, $snapname);
> + if($snapname) {
> + $path = $snappath;
> + } elsif ($current_snap) {
> + $path = $current_snap->{file};
> + }
> + }
see commente below
> return wantarray ? ($path, $vmid, $vtype) : $path;
> }
>
> sub path {
> my ($class, $scfg, $volname, $storeid, $snapname) = @_;
>
> - return $class->filesystem_path($scfg, $volname, $snapname);
> + my $current_snapshot = $class->get_current_snapshot($scfg, $storeid, $volname);
this is pretty expensive, and would only be needed if $snapname is not set..
> + return $class->filesystem_path($scfg, $volname, $snapname, $current_snapshot);
couldn't we avoid extending the signature of filesystem_path and just pass the name of the current snapshot as $snapname?
> }
>
> sub create_base {
> @@ -1074,13 +1088,31 @@ sub volume_resize {
> sub volume_snapshot {
> my ($class, $scfg, $storeid, $volname, $snap) = @_;
>
> - die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
> + die "can't snapshot this image format\n" if $volname !~ m/\.(raw|qcow2|qed)$/;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + die "external snapshot need to be enabled to snapshot .raw volumes\n" if !$scfg->{snapext};
this condition is definitely wrong - it means no more snapshotting unless external snapshot support is enabled..
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> + if($scfg->{snapext}) {
>
> - run_command($cmd);
> + my $path = $class->path($scfg, $volname, $storeid);
> +
> + my $snappath = get_snap_path($path, $snap);
> + my $format = ($class->parse_volname($volname))[6];
> +
> + my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $path,
> + '-F', $format, '-f', 'qcow2', $snappath];
see comments on qemu-server, but.. wouldn't it be better if the file with $snap in its name would be the one storing that snapshot's data? i.e., rename the "current" volume to be called ...-$snap... , and then create a new "current" file without a suffix with the renamed volume as backing file?
> +
> + my $options = "extended_l2=on,";
> + $options .= preallocation_cmd_option($scfg, 'qcow2');
> + push @$cmd, '-o', $options;
> + run_command($cmd);
> +
> + } else {
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1091,19 +1123,39 @@ sub volume_snapshot {
> sub volume_rollback_is_possible {
> my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
>
> + if ($scfg->{snapext}) {
> + #technically, we could manage multibranch, we it need lot more work for snapshot delete
would multibranch be easier if there is a simple 1:1 correspondence between snapshots and their filenames?
switching to a different part of the "hierarchy" is then just
- delete current volume
- create new current volume using rollback target as backing file
I guess deletion does become harder then, since it potentially requires multiple rebases..
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $snappath = get_snap_path($path, $snap);
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> + my $currentpath = $snapshots->{current}->{file};
> + return 1 if !-e $snappath || $currentpath eq $snappath;
> +
> + die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
> + }
> +
> return 1;
> }
>
> sub volume_snapshot_rollback {
> my ($class, $scfg, $storeid, $volname, $snap) = @_;
>
> - die "can't rollback snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
> + die "can't rollback snapshot this image format\n" if $volname !~ m/\.(raw|qcow2|qed)$/;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + die "external snapshot need to be enabled to rollback snapshot .raw volumes\n" if $volname =~ m/\.(raw)$/ && !$scfg->{snapext};
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> + my $path = $class->filesystem_path($scfg, $volname);
>
> - run_command($cmd);
> + if ($scfg->{snapext}) {
> + #simply delete the current snapshot and recreate it
> + my $snappath = get_snap_path($path, $snap);
> + unlink($snappath);
> + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
this *reads* so weird ;) it is right given the current semantics (current snapshot == live image, snapshot data actually stored in parent snapshot)
> + } else {
> + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1111,17 +1163,50 @@ sub volume_snapshot_rollback {
> sub volume_snapshot_delete {
> my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
>
> - die "can't delete snapshot for this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
> + die "can't delete snapshot for this image format\n" if $volname !~ m/\.(raw|qcow2|qed)$/;
> +
> + die "external snapshot need to be enabled to delete snapshot of .raw volumes\n" if !$scfg->{snapext};
>
> return 1 if $running;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + my $cmd = "";
> + if ($scfg->{snapext}) {
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> + my $snappath = $snapshots->{$snap}->{file};
> + return if !-e $snappath; #already deleted ?
> +
> + my $parentsnap = $snapshots->{$snap}->{parent};
> + my $childsnap = $snapshots->{$snap}->{child};
> + die "error: can't find a parent for this snapshot" if !$parentsnap;
>
> - $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> + my $parentpath = $snapshots->{$parentsnap}->{file};
> + my $parentformat = $snapshots->{$parentsnap}->{'format'} if $parentsnap;
> + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
> + my $childformat = $snapshots->{$childsnap}->{'format'} if $childsnap;
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> + print "merge snapshot $snap to $parentsnap\n";
> + $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> + run_command($cmd);
> +
> + #if we delete an intermediate snapshot, we need to link upper snapshot to base snapshot
> + if($childpath && -e $childpath) {
> + die "missing parentsnap snapshot to rebase child $childpath\n" if !$parentpath;
> + print "link $childsnap to $parentsnap\n";
> + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath, '-F', $parentformat, '-f', $childformat, $childpath];
> + run_command($cmd);
> + }
wouldn't a regular safe rebase work just as well, instead of commit + unsafe rebase? if there is no parent, passing in "" as "new" backing file should work..
> +
> + #delete the snapshot
> + unlink($snappath);
> + } else {
> + my $path = $class->filesystem_path($scfg, $volname);
>
> - run_command($cmd);
> + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +
> + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1140,10 +1225,6 @@ sub volume_has_feature {
> my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running, $opts) = @_;
>
> my $features = {
> - snapshot => {
> - current => { qcow2 => 1 },
> - snap => { qcow2 => 1 },
> - },
> clone => {
> base => { qcow2 => 1, raw => 1, vmdk => 1 },
> },
> @@ -1159,11 +1240,23 @@ sub volume_has_feature {
> base => { qcow2 => 1, raw => 1, vmdk => 1 },
> current => { qcow2 => 1, raw => 1, vmdk => 1 },
> },
> - rename => {
> - current => {qcow2 => 1, raw => 1, vmdk => 1},
> - },
> + 'rename' => {
> + current => { qcow2 => 1, raw => 1, vmdk => 1},
> + }
> };
>
> + if ($scfg->{snapext}) {
> + $features->{snapshot} = {
> + current => { raw => 1, qcow2 => 1 },
> + snap => { raw => 1, qcow2 => 1 },
> + }
> + } else {
> + $features->{snapshot} = {
> + current => { qcow2 => 1 },
> + snap => { qcow2 => 1 },
> + };
> + }
> +
this could just leave $features as it is, and add the "raw" bits:
if ($scfg->{snapext}) {
$features->{snapshot}->{current}->{raw} = 1;
$features->{snapshot}->{snap}->{raw} = 1;
}
> if ($feature eq 'clone') {
> if (
> defined($opts->{valid_target_formats})
> @@ -1222,7 +1315,9 @@ sub list_images {
> }
>
> if ($vollist) {
> - my $found = grep { $_ eq $volid } @$vollist;
> + my $search_volid = $volid;
> + $search_volid =~ s/-snap-.*\./\./;
> + my $found = grep { $_ eq $search_volid } @$vollist;
> next if !$found;
> }
>
> @@ -1380,7 +1475,53 @@ sub status {
> sub volume_snapshot_info {
> my ($class, $scfg, $storeid, $volname) = @_;
>
> - die "volume_snapshot_info is not implemented for $class";
> + die "volume_snapshot_info is not implemented for $class" if !$scfg->{snapext};
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +
> + my $basevolname = $volname;
> + $basevolname =~ s/\.(raw|qcow2)$//;
> +
> + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> + my $info = {};
> + for my $snap (@$snapshots) {
> +
> + my $volid = $snap->{volid};
> + next if ($volid !~ m/$basevolname/);
this regex is broken w.r.t. partial matching!
e.g., if a VM has both a disk -1.qcow2 and -11.qcow2 and I attempt to snapshot it using external snapshots:
snapshotting 'drive-scsi0' (extsnap:131314/vm-131314-disk-0.raw)
Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on preallocation=off compression_type=zlib size=200704 backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-test.qcow2 backing_fmt=raw lazy_refcounts=off refcount_bits=16
snapshotting 'drive-scsi1' (extsnap:131314/vm-131314-disk-1.qcow2)
Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on preallocation=off compression_type=zlib size=2147483648 backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
snapshotting 'drive-scsi2' (extsnap:131314/vm-131314-disk-11.qcow2)
qemu-img: /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2: Error: Trying to create an image with the same filename as the backing file
snapshot create failed: starting cleanup
merge snapshot test2 to test
Image committed.
merge snapshot test2 to base
Image committed.
TASK ERROR: command '/usr/bin/qemu-img create -b /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2 -F qcow2 -f qcow2 /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2 -o 'extended_l2=on,preallocation=off'' failed: exit code 1
> +
> + my (undef, $snapvolname) = parse_volume_id($volid);
> + my $snapname = get_snapname_from_path($volid);
> + my $snapfile = $class->filesystem_path($scfg, $snapvolname, $snapname);
> + $snapname = 'base' if !$snapname;
> +
> + my $format = $snap->{'format'};
> + my $parentfile = $snap->{parent};
> + my $parentname = get_snapname_from_path($parentfile) if $parentfile;
> + $parentname = 'base' if !$parentname && $parentfile;
> +
> + $info->{$snapname}->{file} = $snapfile;
> + $info->{$snapname}->{volid} = $volid;
> + $info->{$snapname}->{'format'} = $format;
> + $info->{$snapname}->{parent} = $parentname if $parentname;
> + $info->{$parentname}->{child} = $snapname if $parentname;
> + }
> +
> + my $current = undef;
> + for my $id (keys %$info) {
> + my $snap = $info->{$id};
> + die "error: snap $id: you can't have multiple current snapshot: current:$current\n" if !$snap->{child} && $current;
> + $current = $id if !$snap->{child};
> + }
> +
> + if ($current) {
> + $info->{current}->{file} = $info->{$current}->{file};
> + $info->{current}->{'format'} = $info->{$current}->{'format'};
> + $info->{current}->{parent} = $info->{$current}->{parent};
> + }
> +
> + return $info;
> }
>
> sub activate_storage {
> @@ -1764,4 +1905,38 @@ sub config_aware_base_mkdir {
> }
> }
>
> +sub get_snap_path {
> + my ($path, $snap) = @_;
> +
> + my $basepath = "";
> + my $baseformat = "";
> + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-(.*))?\.(raw|qcow2)/) {
this regex is wrong - volumes can have arbitrary names after the -disk- part..
> + $basepath = $1;
> + $baseformat = $8;
> + }
> + my $format = $snap ? 'qcow2' : $baseformat;
> + my $snappath = $snap ? $basepath."-snap-$snap.$format" : undef;
> +
> + return $snappath;
> +}
> +
> +sub get_snapname_from_path {
> + my ($path) = @_;
> +
> + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-(.*))?\.(raw|qcow2)/) {
here as well.. and this whole helper is just used twice in volume_snapshot_info, maybe it could be inlined or made private
> + my $snapname = $7;
> + return $snapname;
> + }
> + die "can't parse snapname from path";
> +}
> +
> +sub get_current_snapshot {
> + my ($class, $scfg, $storeid, $volname) = @_;
> + #IMPROVE ME: faster way to find current snapshot? (search the most recent created snapshot file ? need to works with lvm volume too)
> +
> + return if !$scfg->{snapext};
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> + return $snapshots->{current};
> +}
> +
> 1;
> --
> 2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot) Alexandre Derumier via pve-devel
@ 2024-10-23 10:13 ` Fabian Grünbichler
2024-10-23 13:45 ` DERUMIER, Alexandre via pve-devel
[not found] ` <e976104d8ed7c365d8a482fa320a0691456e69c1.camel@groupe-cyllene.com>
0 siblings, 2 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-23 10:13 UTC (permalink / raw)
To: Proxmox VE development discussion
I am not yet convinced this is somehow a good idea, but maybe you can convince me otherwise ;)
variant A: this is just useful for very short-lived snapshots
variant B: these snapshots are supposed to be long-lived
A is not something we want. we intentionally don't have non-thin LVM snapshots for example.
B once I create a single snapshot, the "original" storage only contains the data written up to that point, anything else is stored on the "snapshot" storage. this means my snapshot storage must be at least as fast/good/shared/.. as my original storage. in that case, I can just use the snapshot storage directly and ditch the original storage?
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 30.09.2024 13:31 CEST geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
> ---
> src/PVE/Storage.pm | 2 +
> src/PVE/Storage/LvmQcow2Plugin.pm | 460 ++++++++++++++++++++++++++++++
> src/PVE/Storage/Makefile | 3 +-
> 3 files changed, 464 insertions(+), 1 deletion(-)
> create mode 100644 src/PVE/Storage/LvmQcow2Plugin.pm
>
> diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> index 57b2038..119998f 100755
> --- a/src/PVE/Storage.pm
> +++ b/src/PVE/Storage.pm
> @@ -28,6 +28,7 @@ use PVE::Storage::Plugin;
> use PVE::Storage::DirPlugin;
> use PVE::Storage::LVMPlugin;
> use PVE::Storage::LvmThinPlugin;
> +use PVE::Storage::LvmQcow2Plugin;
> use PVE::Storage::NFSPlugin;
> use PVE::Storage::CIFSPlugin;
> use PVE::Storage::ISCSIPlugin;
> @@ -54,6 +55,7 @@ our $KNOWN_EXPORT_FORMATS = ['raw+size', 'tar+size', 'qcow2+size', 'vmdk+size',
> PVE::Storage::DirPlugin->register();
> PVE::Storage::LVMPlugin->register();
> PVE::Storage::LvmThinPlugin->register();
> +PVE::Storage::LvmQcow2Plugin->register();
> PVE::Storage::NFSPlugin->register();
> PVE::Storage::CIFSPlugin->register();
> PVE::Storage::ISCSIPlugin->register();
> diff --git a/src/PVE/Storage/LvmQcow2Plugin.pm b/src/PVE/Storage/LvmQcow2Plugin.pm
> new file mode 100644
> index 0000000..68c8686
> --- /dev/null
> +++ b/src/PVE/Storage/LvmQcow2Plugin.pm
> @@ -0,0 +1,460 @@
> +package PVE::Storage::LvmQcow2Plugin;
> +
> +use strict;
> +use warnings;
> +
> +use IO::File;
> +
> +use PVE::Tools qw(run_command trim);
> +use PVE::Storage::Plugin;
> +use PVE::Storage::LVMPlugin;
> +use PVE::JSONSchema qw(get_standard_option);
> +
> +use base qw(PVE::Storage::LVMPlugin);
> +
> +# Configuration
> +
> +sub type {
> + return 'lvmqcow2';
> +}
> +
> +sub plugindata {
> + return {
> + #container not yet implemented #need to implemented dm-qcow2
> + content => [ {images => 1, rootdir => 1}, { images => 1 }],
> + };
> +}
> +
> +sub properties {
> + return {
> + };
> +}
> +
> +sub options {
> + return {
> + vgname => { fixed => 1 },
> + nodes => { optional => 1 },
> + shared => { optional => 1 },
> + disable => { optional => 1 },
> + saferemove => { optional => 1 },
> + saferemove_throughput => { optional => 1 },
> + content => { optional => 1 },
> + base => { fixed => 1, optional => 1 },
> + tagged_only => { optional => 1 },
> + bwlimit => { optional => 1 },
> + snapext => { fixed => 1 },
> + };
> +}
> +
> +# Storage implementation
> +
> +sub parse_volname {
> + my ($class, $volname) = @_;
> +
> + PVE::Storage::Plugin::parse_lvm_name($volname);
> + my $format = $volname =~ m/^(.*)-snap-/ ? 'qcow2' : 'raw';
> +
> + if ($volname =~ m/^((vm|base)-(\d+)-\S+)$/) {
> + return ('images', $1, $3, undef, undef, $2 eq 'base', $format);
> + }
> +
> + die "unable to parse lvm volume name '$volname'\n";
> +}
> +
> +sub filesystem_path {
> + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
> +
> + my ($vtype, $name, $vmid) = $class->parse_volname($volname);
> +
> + my $vg = $scfg->{vgname};
> +
> + my $path = "/dev/$vg/$name";
> +
> + if($snapname) {
> + $path = get_snap_volname($path, $snapname);
> + } elsif ($current_snap) {
> + $path = $current_snap->{file};
> + }
> +
> + return wantarray ? ($path, $vmid, $vtype) : $path;
> +}
> +
> +sub create_base {
> + my ($class, $storeid, $scfg, $volname) = @_;
> +
> + my $vg = $scfg->{vgname};
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> + $class->parse_volname($volname);
> +
> + die "create_base not possible with base image\n" if $isBase;
> +
> + die "unable to create base volume - found snapshot" if $class->snapshot_exist($scfg, $storeid, $volname);
> +
> + my $newname = $name;
> + $newname =~ s/^vm-/base-/;
> +
> + my $cmd = ['/sbin/lvrename', $vg, $volname, $newname];
> + run_command($cmd, errmsg => "lvrename '$vg/$volname' => '$vg/$newname' error");
> +
> + # set inactive, read-only flags
> + $cmd = ['/sbin/lvchange', '-an', '-pr', "$vg/$newname"];
> + eval { run_command($cmd); };
> + warn $@ if $@;
> +
> + my $newvolname = $newname;
> +
> + return $newvolname;
> +}
> +
> +sub clone_image {
> + my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
> +
> + die "can't clone images in lvm storage\n";
> +}
> +
> +sub alloc_image {
> + my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> +
> + die "unsupported format '$fmt'" if $fmt ne 'raw';
> +
> + die "illegal name '$name' - should be 'vm-$vmid-*'\n"
> + if $name && $name !~ m/^vm-$vmid-/;
> +
> + my $vgs = PVE::Storage::LVMPlugin::lvm_vgs();
> +
> + my $vg = $scfg->{vgname};
> +
> + die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
> +
> + my $free = int($vgs->{$vg}->{free});
> +
> + die "not enough free space ($free < $size)\n" if $free < $size;
> +
> + $name = $class->find_free_diskname($storeid, $scfg, $vmid)
> + if !$name;
> +
> + my $tags = ["pve-vm-$vmid"];
> + if ($name =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
> + push @$tags, "\@pve-$1";
> + }
> +
> + PVE::Storage::LVMPlugin::lvcreate($vg, $name, $size, $tags);
> +
> + return $name;
> +}
> +
> +sub volume_snapshot_info {
> + my ($class, $scfg, $storeid, $volname) = @_;
> +
> + return $class->list_snapshots($scfg, $storeid, $volname);
> +}
> +
> +sub activate_volume {
> + my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
> +
> + my $lvm_activate_mode = 'ey';
> + my $tag = undef;
> +
> + #activate volume && all volumes snapshots by tag
> + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
> + $tag = "\@pve-vm-$4-disk-$5";
> + }
> +
> + my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $tag];
> + run_command($cmd, errmsg => "can't activate LV '$tag'");
> +
> + $cmd = ['/sbin/lvchange', '--refresh', $tag];
> + run_command($cmd, errmsg => "can't refresh LV '$tag' for activation");
> +}
> +
> +sub deactivate_volume {
> + my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
> +
> + my $tag = undef;
> + #deactivate volume && all volumes snasphots by tag
> + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
> + $tag = "\@pve-vm-$4-disk-$5";
> + }
> +
> + my $cmd = ['/sbin/lvchange', '-aln', $tag];
> + run_command($cmd, errmsg => "can't deactivate LV '$tag'");
> +}
> +
> +sub volume_resize {
> + my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
> +
> + #we should resize the base image and parents snapshots,
> + #but how to manage rollback ?
> +
> + die "can't resize if snasphots exist" if $class->snapshot_exist($scfg, $storeid, $volname);
> +
> + return 1;
> +}
> +
> +sub volume_snapshot {
> + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +
> + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> +
> + my $current_path = $class->path($scfg, $volname, $storeid);
> + my $current_format = (PVE::Storage::Plugin::file_size_info($current_path))[1];
> + my $snappath = get_snap_volname($current_path, $snap);
> +
> + my $snapvolname = get_snap_volname($volname, $snap);
> + #allocate lvm snapshot volume
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> + $class->parse_volname($volname);
> + my $size = $class->volume_size_info($scfg, $storeid, $volname, 5);
> + #add 100M for qcow2 headers
> + $size = int($size/1024) + (100*1024);
> +
> + $class->alloc_image($storeid, $scfg, $vmid, 'raw', $snapvolname, $size);
> +
> + # create the qcow2 fs
> + eval {
> + my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $current_path,
> + '-F', $current_format, '-f', 'qcow2', $snappath];
> + my $options = "extended_l2=on,";
> + $options .= PVE::Storage::Plugin::preallocation_cmd_option($scfg, 'qcow2');
> + push @$cmd, '-o', $options;
> + run_command($cmd);
> + };
> + if ($@) {
> + eval { $class->free_image($storeid, $scfg, $snapvolname, 0) };
> + warn $@ if $@;
> + }
> +}
> +
> +# Asserts that a rollback to $snap on $volname is possible.
> +# If certain snapshots are preventing the rollback and $blockers is an array
> +# reference, the snapshot names can be pushed onto $blockers prior to dying.
> +sub volume_rollback_is_possible {
> + my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $snappath = get_snap_volname($path, $snap);
> + my $currentpath = $class->path($scfg, $volname, $storeid);
> + return 1 if $currentpath eq $snappath;
> +
> + die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
> +
> + return 1;
> +}
> +
> +sub volume_snapshot_rollback {
> + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +
> + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> + #simply delete the current snapshot and recreate it
> +
> + my $snapvolname = get_snap_volname($volname, $snap);
> +
> + $class->free_image($storeid, $scfg, $snapvolname, 0);
> + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
> +}
> +
> +sub volume_snapshot_delete {
> + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> +
> + return 1 if $running;
> +
> + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> + my $snappath = $snapshots->{$snap}->{file};
> + if(!$snappath) {
> + warn "$snap already deleted. skip\n";
> + return;
> + }
> +
> + my $snapvolname = $snapshots->{$snap}->{volname};
> + my $parentsnap = $snapshots->{$snap}->{parent};
> + my $childsnap = $snapshots->{$snap}->{child};
> + die "error: can't find a parent for this snapshot" if !$parentsnap;
> +
> + my $parentpath = $snapshots->{$parentsnap}->{file};
> + my $parentformat = $snapshots->{$parentsnap}->{'format'} if $parentsnap;
> + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
> + my $childformat = $snapshots->{$childsnap}->{'format'} if $childsnap;
> +
> + print "merge snapshot $snap to $parentsnap\n";
> + my $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> + run_command($cmd);
> +
> + #if we delete an intermediate snapshot, we need to link upper snapshot to base snapshot
> + if($childpath && -e $childpath) {
> + die "missing parentsnap snapshot to rebase child $childpath\n" if !$parentpath;
> + print "link $childsnap to $parentsnap\n";
> + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath, '-F', $parentformat, '-f', $childformat, $childpath];
> + run_command($cmd);
> + }
> +
> + #delete the snapshot
> + $class->free_image($storeid, $scfg, $snapvolname, 0);
> +
> + return;
> +}
> +
> +sub volume_has_feature {
> + my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
> +
> + my $features = {
> + snapshot => { current => 1 },
> +# clone => { base => 1, snap => 1}, #don't allow to clone as we can't activate the base between different host ?
> + template => { current => 1},
> + copy => { base => 1, current => 1, snap => 1},
> + sparseinit => { base => 1, current => 1},
> + rename => {current => 1},
> + };
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> + $class->parse_volname($volname);
> +
> + my $key = undef;
> + if($snapname){
> + $key = 'snap';
> + }else{
> + $key = $isBase ? 'base' : 'current';
> + }
> + return 1 if $features->{$feature}->{$key};
> +
> + return undef;
> +}
> +
> +sub get_snap_volname {
> + my ($path, $snap) = @_;
> +
> + my $basepath = "";
> + my $baseformat = "";
> + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-Z0-9]+))?(\.(raw|qcow2))?/) {
> + $basepath = $1;
> + $baseformat = $8;
> + }
> + my $snapvolname = $basepath."-snap-$snap.qcow2";
> + return $snapvolname;
> +}
> +
> +sub get_snapname_from_path {
> + my ($path) = @_;
> +
> + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-Z0-9]+))?(\.(raw|qcow2))?/) {
> + my $snapname = $7;
> + return $snapname;
> + }
> + die "can't parse snapname from path $path";
> +}
> +
> +sub get_current_snapshot {
> + my ($class, $scfg, $storeid, $volname) = @_;
> +
> + #get more recent ctime volume
> + return $class->list_snapshots($scfg, $storeid, $volname, 1);
> +}
> +my $check_tags = sub {
> + my ($tags) = @_;
> +
> + return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
> +};
> +
> +sub list_images {
> + my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
> +
> + my $vgname = $scfg->{vgname};
> +
> + $cache->{lvs} = PVE::Storage::LVMPlugin::lvm_list_volumes() if !$cache->{lvs};
> +
> + my $res = [];
> +
> + if (my $dat = $cache->{lvs}->{$vgname}) {
> +
> + foreach my $volname (keys %$dat) {
> +
> + next if $volname !~ m/^(vm|base)-(\d+)-/;
> + my $owner = $2;
> +
> + my $info = $dat->{$volname};
> +
> + next if $scfg->{tagged_only} && !&$check_tags($info->{tags});
> +
> + # Allow mirrored and RAID LVs
> + next if $info->{lv_type} !~ m/^[-mMrR]$/;
> +
> + my $volid = "$storeid:$volname";
> +
> + if ($vollist) {
> + my $found = grep { $_ eq $volid } @$vollist;
> + next if !$found;
> + } else {
> + next if defined($vmid) && ($owner ne $vmid);
> + }
> +
> + push @$res, {
> + volid => $volid, format => 'raw', size => $info->{lv_size}, vmid => $owner,
> + ctime => $info->{ctime},
> + };
> + }
> + }
> +
> + return $res;
> +}
> +
> +sub list_snapshots {
> + my ($class, $scfg, $storeid, $volname, $current_only) = @_;
> +
> + my $vgname = $scfg->{vgname};
> +
> + my $basevolname = $volname;
> + my $lvs = PVE::Storage::LVMPlugin::lvm_list_volumes($vgname);
> +
> + my $vg = $lvs->{$vgname};
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> +
> + my $info = {};
> + for my $snap (@$snapshots) {
> + my $snap_volid = $snap->{volid};
> + next if ($snap_volid !~ m/$basevolname/);
> +
> + my $snapname = get_snapname_from_path($snap_volid);
> + my (undef, $snap_volname) = PVE::Storage::parse_volume_id($snap_volid);
> + my $snapfile = $class->filesystem_path($scfg, $snap_volname, $snapname);
> + $snapname = 'base' if !$snapname;
> + $info->{$snapname}->{file} = $snapfile;
> + $info->{$snapname}->{volname} = $snap_volname;
> + $info->{$snapname}->{volid} = $snap_volid;
> + $info->{$snapname}->{ctime} = $snap->{ctime};
> +
> + if (!$current_only) {
> + my (undef, $format, undef, $parentfile, undef) = PVE::Storage::Plugin::file_size_info($snapfile);
> + next if !$parentfile && $snapname ne 'base'; #bad unlinked snasphot
> +
> + my $parentname = get_snapname_from_path($parentfile) if $parentfile;
> + $parentname = 'base' if !$parentname && $parentfile;
> +
> + $info->{$snapname}->{'format'} = $format;
> + $info->{$snapname}->{parent} = $parentname if $parentname;
> + $info->{$parentname}->{child} = $snapname if $parentname;
> + }
> + }
> +
> + my @snapshots_sorted = sort { $info->{$b}{ctime} <=> $info->{$a}{ctime} } keys %$info;
> + my $current_snapname = $snapshots_sorted[0];
> + my $current_snapshot = $info->{$current_snapname};
> + return $current_snapshot if $current_only;
> +
> + $info->{current} = { %$current_snapshot };
> + return $info;
> +}
> +
> +sub snapshot_exist {
> + my ($class, $scfg, $storeid, $volname) = @_;
> +
> + my $basepath = $class->filesystem_path($scfg, $volname);
> + my $currentpath = $class->path($scfg, $volname, $storeid);
> +
> + die "can't resize if snasphots exist" if $currentpath ne $basepath;
> +
> +}
> +1;
> diff --git a/src/PVE/Storage/Makefile b/src/PVE/Storage/Makefile
> index d5cc942..1af8aab 100644
> --- a/src/PVE/Storage/Makefile
> +++ b/src/PVE/Storage/Makefile
> @@ -14,7 +14,8 @@ SOURCES= \
> PBSPlugin.pm \
> BTRFSPlugin.pm \
> LvmThinPlugin.pm \
> - ESXiPlugin.pm
> + ESXiPlugin.pm \
> + LvmQcow2Plugin.pm
>
> .PHONY: install
> install:
> --
> 2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
2024-09-30 11:31 ` [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot Alexandre Derumier via pve-devel
@ 2024-10-23 10:14 ` Fabian Grünbichler
2024-10-23 14:31 ` DERUMIER, Alexandre via pve-devel
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-23 10:14 UTC (permalink / raw)
To: Proxmox VE development discussion
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am 30.09.2024 13:31 CEST geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
> ---
> PVE/QemuServer.pm | 108 ++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 95 insertions(+), 13 deletions(-)
>
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index b26da505..1523df15 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -1549,7 +1549,11 @@ sub print_drive_commandline_full {
> } else {
> if ($storeid) {
> $path = PVE::Storage::path($storecfg, $volid);
> - $format //= qemu_img_format($scfg, $volname);
> + if ($scfg->{snapext}) {
> + $format //= qemu_img_format($scfg, $path);
> + } else {
> + $format //= qemu_img_format($scfg, $volname);
> + }
another reason to forbid raw-based snapshotting? ;)
> } else {
> $path = $volid;
> $format //= "raw";
> @@ -4713,9 +4717,31 @@ sub qemu_volume_snapshot {
> my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
>
> my $running = check_running($vmid);
> + my $do_snapshots_with_qemu = do_snapshots_with_qemu($storecfg, $volid, $deviceid) if $running;
forbidden post-if + declaration, see https://pve.proxmox.com/wiki/Perl_Style_Guide
> + if ($do_snapshots_with_qemu) {
> + if($do_snapshots_with_qemu == 2) {
wrong nesting - this should be
if ($do_snapshots_with_qemu == 1) {
..
} elsif ($do_snapshots_with_qemu == 2) {
..
} else {
..
}
> + my $snapshot_file = PVE::Storage::path($storecfg, $volid, $snap);
> + #allocate volume is external snapshot is a block device
> + my $snap_volid = undef;
> + if ($snapshot_file =~ m|^/dev/.+|) {
> + my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
> + my $size = PVE::Storage::volume_size_info($storecfg, $volid, 5);
> + #add 100M for qcow2 headers
> + $size = int($size/1024) + (100*1024);
> + my $snap_volname = $volname."-snap-$snap";
> + $snap_volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, 'raw', $snap_volname, $size);
> + PVE::Storage::activate_volumes($storecfg, [$snap_volid]);
> + }
haven't tested this part
> +
> + eval { mon_cmd($vmid, 'blockdev-snapshot-sync', device => $deviceid, 'snapshot-file' => $snapshot_file, format => 'qcow2') };
if we want the current volume to keep its name, and the snapshot volume to actually contain *that* snapshot's data, we need some sort of rename dance here as well.. i.e., rename the current volume to have the snapshot volume name, then snapshot it back into the "current" name. not sure what the proper qmp runes would be to achieve that?
maybe (untested!):
let's say "vm-100-disk-1.qcow2" is the current volume. it might or might not have snapshots/backing files already.
1. snapshot into snapshot volume "vm-100-disk-1-snap-foobar.qcow2"
"vm-100-disk-1.qcow2" is the backing file of the new "vm-100-disk-1-snap-foobar.qcow2" volume, and now contains the delta for the snapshot "foobar"
2. block-stream "vm-100-disk-1.qcow2", potentially with its backing file as base, into "vm-100-disk-1-snap-foobar.qcow2"
now "vm-100-disk-1-snap-foobar.qcow2" should contain the delta of snapshot "foobar" to the previous snapshot (if one exists, or the complete data otherwise)
3. delete "vm-100-disk-1.qcow2" on the storage layer now (it's no longer part of the backing chain)
4. snapshot "vm-100-disk-1-snap-foobar.qcow2" into the now free "vm-100-disk-1.qcow2" volume
then we end up with a snapshot volume representing the snapshot delta, and a current volume on top that gets the new writes?
steps 1-3 are just preparation/renaming the "live" top overlay, 4 is the actual snapshotting part. but of course, this causes I/O, so would require further adaptations to work in a consistent fashion without a huge downtime.
alternatively, something like this could also work (also completely untested):
1. snapshot into temp name
"vm-100-disk-1.qcow2" is now the backing file of this new volume, and contains the state for snapshot "foobar"
2. hardlink "vm-100-disk-1.qcow2" into "vm-100-disk-1-snap-foobar.qcow2"
3. QMP change-backing-file of temp volume to "vm-100-disk-1-snap-foobar.qcow2"
"vm-100-disk-1.qcow2" is now no longer part of the backing chain
4. remove "vm-100-disk-1.qcow2"
5. snapshot into "vm-100-disk-1.qcow2"
6. block-stream temp name into "vm-100-disk-1.qcow2", with "vm-100-disk-1-snap-foobar.qcow2" as base
since the temp volume is empty (VM doesn't do I/O if multiple disks are snapshotted), block-stream should be fast in this case I think..
>
> - if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
> - mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
> + if ($@) {
> + PVE::Storage::vdisk_free($storecfg, $snap_volid) if $snapshot_file =~ m|^/dev/.+|;
this should check definedness of $snap_volid, instead of $snapshot_file?
> + die $@;
> + }
> + } else {
> + mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
> + }
> } else {
> PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
> }
> @@ -4735,13 +4761,52 @@ sub qemu_volume_snapshot_delete {
> });
> }
>
> - if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
> - mon_cmd(
> - $vmid,
> - 'blockdev-snapshot-delete-internal-sync',
> - device => $attached_deviceid,
> - name => $snap,
> - );
> + my $do_snapshots_with_qemu = do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid) if $running;
same post-if/declaration issue here
> + if ($attached_deviceid && $do_snapshots_with_qemu) {
> +
> + if ($do_snapshots_with_qemu == 2) {
and same nesting if comment here ;)
> +
> + my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
> +
> + my $currentpath = $snapshots->{current}->{file};
> + my $snappath = $snapshots->{$snap}->{file};
> + my $snapvolid = $snapshots->{$snap}->{volid};
> + return if !$snappath; #already delete
how can this be? if the device is attached and snapshotted the snapshot must be part of the backing chain?
> + my $parentsnap = $snapshots->{$snap}->{parent};
> + die "error: we can't find a parent for this snapshot" if !$parentsnap;
> +
> + my $parentpath = $snapshots->{$parentsnap}->{file};
> + my $parentformat = $snapshots->{$parentsnap}->{'format'} if $parentsnap;
> +
> + print "block-commit top:$snappath base:$parentpath\n";
> +
> + my $job_id = "commit-$attached_deviceid";
> + my $jobs = {};
> + mon_cmd(
> + $vmid,
> + 'block-commit',
> + 'job-id' => $job_id,
> + device => $attached_deviceid,
> + top => $snappath,
> + base => $parentpath,
> + );
> + $jobs->{$job_id} = {};
> +
> + #if we delete the current, block-job-complete to finish
> + my $completion = $currentpath eq $snappath ? 'complete' : 'auto';
> + qemu_drive_mirror_monitor($vmid, undef, $jobs, $completion, 0, 'commit');
> + #fixme. delete the disks when all jobs are ok ?
> + #delete the lvm volume
> + PVE::Storage::vdisk_free($storecfg, $snapvolid);
> + } else {
> + mon_cmd(
> + $vmid,
> + 'blockdev-snapshot-delete-internal-sync',
> + device => $attached_deviceid,
> + name => $snap,
> + );
> + }
> } else {
> PVE::Storage::volume_snapshot_delete(
> $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
> @@ -7776,6 +7841,8 @@ sub do_snapshots_with_qemu {
> return 1;
> }
>
> + return 2 if $scfg->{snapext};
> +
that would definitely warrant a comment and/or an exhaustive check of existing call sites ;)
> if ($volid =~ m/\.(qcow2|qed)$/){
> return 1;
> }
> @@ -7849,8 +7916,23 @@ sub qemu_img_convert {
> if ($src_storeid) {
> PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
> my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
> - $src_format = qemu_img_format($src_scfg, $src_volname);
> - $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
> + if($src_scfg->{snapext}) {
this whole thing here is very confusing..
> + my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $src_volid);
> + $snapname = 'current' if !$snapname;
> + #if we don't clone the current image
> + #need to use the parent if available, as it's the readonly image view
> + #at the time of the snapshot
> + my $parentsnap = $snapshots->{$snapname}->{parent};
> + $snapname = $parentsnap if($parentsnap && $snapname ne 'current');
> + $src_format = $snapshots->{$snapname}->{format};
> + $src_path = $snapshots->{$snapname}->{file};
> + $src_volid = $snapshots->{$snapname}->{volid};
> + $snapname = undef;
> + PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
$snapname is always undef for this activate_volumes invocation..
but this whole if seems kinda of strange, wouldn't it be enough to just call PVE::Storage::path with $snapname (to get the path to read from for cloning this snapshot or the volume itself) and then unset $snapname, or skip passing that to convert if snapshots are external?
> + } else {
> + $src_format = qemu_img_format($src_scfg, $src_volname);
> + $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
i.e., basically what is already done here (and if we don't support raw original volumes, then it's exactly this code?)
> + }
> $src_is_iscsi = ($src_path =~ m|^iscsi://|);
> $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
> } elsif (-f $src_volid || -b $src_volid) {
> @@ -7920,7 +8002,7 @@ sub qemu_img_format {
>
> # FIXME: this entire function is kind of weird given that `parse_volname`
> # also already gives us a format?
> - my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi';
> + my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi' || $scfg->{snapext};
>
> if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
> return $1;
> --
> 2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-10-23 10:12 ` Fabian Grünbichler
@ 2024-10-23 12:59 ` DERUMIER, Alexandre via pve-devel
[not found] ` <f066c13a25b30e3107a9dec8091b456ce2852293.camel@groupe-cyllene.com>
2024-10-24 7:50 ` Fabian Grünbichler
2 siblings, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-23 12:59 UTC (permalink / raw)
To: pve-devel, f.gruenbichler; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 40842 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>, "f.gruenbichler@proxmox.com" <f.gruenbichler@proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
Date: Wed, 23 Oct 2024 12:59:33 +0000
Message-ID: <f066c13a25b30e3107a9dec8091b456ce2852293.camel@groupe-cyllene.com>
Hi Fabian,
thanks for the review !
>>-------- Message initial --------
>>De: Fabian Grünbichler <f.gruenbichler@proxmox.com>
>>À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
>>Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
>>Objet: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external
>>snasphot support
>>Date: 23/10/2024 12:12:46
>>
>>some high level comments:
>>
>>I am not sure how much we gain here with the raw support?
They are really qcow2 overhead, mostly with big disk.
as for good performance, the qcow2 l2-cache-size need to be keeped in
memory (and it's 1MB by disk)
https://events.static.linuxfound.org/sites/events/files/slides/kvm-forum-2017-slides.pdf
Hopefully, they are improvments with the "new" sub-cluster feature
https://people.igalia.com/berto/files/kvm-forum-2020-slides.pdf
I'm already using it at snapshot create, but I think we should also use
it for main qcow2 volume.
But even with that, you can still have performance impact.
So yes, I think they are really usecase for workload when you only need
snapshot time to time (before an upgrade for example), but max
performance with no snaphot exist.
>> it's a bit confusing to have a volid ending with raw, with the
>>current volume and all but the first snapshot actually being stored
>>in qcow2 files, with the raw file being the "oldest" snapshot in the
>>chain..
if it's too confusing, we could use for example an .snap extension.
(as we known that it's qcow2 behind)
if possible, I'd be much happier with the snapshot name in the snapshot
file being a 1:1 match, see comments inline
>>- makes it a lot easier to understand (admin wants to manually remove
>>snapshot "foo", if "foo" was the last snapshot then right now the
>>volume called "foo" is actually the current contents!)
This part is really difficult, because you can't known in advance the
name of the snapshot you'll take in the future. The only way could be
to create a "current" volume , rename it when you took another
snasphot (I'm not sure it's possible to do it live,
and this could break link chain too)
Also, I don't known how to manage the main volume, when you take the
first snapshot ? we should rename it too.
so "vm-disk-100-disk-0.raw|qcow2" , become "vm-disk-100-disk-0-
snap1.(raw|qcow2)" + new "vm-disk-100-disk-0-current.qcow2" ?
I'll try to do test again to see what is possible.
>>- means we don't have to do lookups via the full snapshot list all
>>the time (e.g., if I want to do a full clone from a snapshot "foo", I
>>can just pass the snap-foo volume to qemu-img)
ok got it
>>the naming scheme for snapshots needs to be adapted to not clash with
>>regular volumes:
>>$ pvesm alloc extsnap 131314 vm-131314-disk-foobar.qcow2 2G
>>Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
>>foobar.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off
>>preallocation=off compression_type=zlib size=2147483648
>>lazy_refcounts=off refcount_bits=16
>>successfully created 'extsnap:131314/vm-131314-disk-foobar.qcow2'
>>$ qm rescan --vmid 131314
>>rescan volumes...
>>can't parse snapname from path at
>>/usr/share/perl5/PVE/Storage/Plugin.pm line 1934.
any preference for naming scheme ? for lvm external snap, I have used
"vm-131314-disk-0-snap-<foobar>";
>>storage_migrate needs to handle external snapshots, or at least error
>>out.
it should already work. (I have tested move_disk, and live migration +
storage migration). qemu_img_convert offline and qemu block job for
live.
>>I haven't tested that part or linked clones or a lot of other
>>advanced related actions at all ;)
For linked clone, we can't have a base image with snapshots (other than
_base_). so It'll be safe.
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am
> 30.09.2024 13:31 CEST geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-
> cyllene.com>
> ---
> src/PVE/Storage/DirPlugin.pm | 1 +
> src/PVE/Storage/Plugin.pm | 225 +++++++++++++++++++++++++++++++--
> --
> 2 files changed, 201 insertions(+), 25 deletions(-)
>
> diff --git a/src/PVE/Storage/DirPlugin.pm
> b/src/PVE/Storage/DirPlugin.pm
> index 2efa8d5..2bef673 100644
> --- a/src/PVE/Storage/DirPlugin.pm
> +++ b/src/PVE/Storage/DirPlugin.pm
> @@ -80,6 +80,7 @@ sub options {
> is_mountpoint => { optional => 1 },
> bwlimit => { optional => 1 },
> preallocation => { optional => 1 },
> + snapext => { optional => 1 },
> };
> }
>
> diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> index 6444390..5e5197a 100644
> --- a/src/PVE/Storage/Plugin.pm
> +++ b/src/PVE/Storage/Plugin.pm
> @@ -214,6 +214,11 @@ my $defaultData = {
> maximum => 65535,
> optional => 1,
> },
> + 'snapext' => {
> + type => 'boolean',
> + description => 'enable external snapshot.',
> + optional => 1,
> + },
> },
> };
>
> @@ -695,7 +700,7 @@ sub get_subdir {
> }
>
> sub filesystem_path {
> - my ($class, $scfg, $volname, $snapname) = @_;
> + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
see comment below
>
> my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
> $class->parse_volname($volname);
> @@ -703,7 +708,7 @@ sub filesystem_path {
> # Note: qcow2/qed has internal snapshot, so path is always
> # the same (with or without snapshot => same file).
> die "can't snapshot this image format\n"
> - if defined($snapname) && $format !~ m/^(qcow2|qed)$/;
> + if defined($snapname) && !$scfg->{snapext} && $format !~
> m/^(qcow2|qed)$/;
>
> my $dir = $class->get_subdir($scfg, $vtype);
>
> @@ -711,13 +716,22 @@ sub filesystem_path {
>
> my $path = "$dir/$name";
>
> + if($scfg->{snapext}) {
> + my $snappath = get_snap_path($path, $snapname);
> + if($snapname) {
> + $path = $snappath;
> + } elsif ($current_snap) {
> + $path = $current_snap->{file};
> + }
> + }
see commente below
> return wantarray ? ($path, $vmid, $vtype) : $path;
> }
>
> sub path {
> my ($class, $scfg, $volname, $storeid, $snapname) = @_;
>
> - return $class->filesystem_path($scfg, $volname, $snapname);
> + my $current_snapshot = $class->get_current_snapshot($scfg,
> $storeid, $volname);
>>this is pretty expensive, and would only be needed if $snapname is
>>not set..
The main problem is when you start a vm on a specific snasphot,
we don't send the $snapname param.
One way could be that qemu-server check the current snapshot from
config when doing specific action like start.
> + return $class->filesystem_path($scfg, $volname, $snapname,
> $current_snapshot);
>>couldn't we avoid extending the signature of filesystem_path and just
pass the name of the current snapshot as $snapname?
I need to redo test, I don't remember why I have splitted them, but you
are right, it should be cleaner.
> }
>
> sub create_base {
> @@ -1074,13 +1088,31 @@ sub volume_resize {
> sub volume_snapshot {
> my ($class, $scfg, $storeid, $volname, $snap) = @_;
>
> - die "can't snapshot this image format\n" if $volname !~
> m/\.(qcow2|qed)$/;
> + die "can't snapshot this image format\n" if $volname !~
> m/\.(raw|qcow2|qed)$/;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + die "external snapshot need to be enabled to snapshot .raw
> volumes\n" if !$scfg->{snapext};
>>this condition is definitely wrong - it means no more snapshotting
>>unless external snapshot support is enabled..
oops, sorry.
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> + if($scfg->{snapext}) {
>
> - run_command($cmd);
> + my $path = $class->path($scfg, $volname, $storeid);
> +
> + my $snappath = get_snap_path($path, $snap);
> + my $format = ($class->parse_volname($volname))[6];
> +
> + my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $path,
> + '-F', $format, '-f', 'qcow2', $snappath];
>>see comments on qemu-server, but.. wouldn't it be better if the file
>>with $snap in its name would be the one storing that snapshot's data?
>>i.e., rename the "current" volume to be called ...-$snap... , and
>>then create a new "current" file without a suffix with the renamed
>>volume as backing file?
I'll try it !
> +
> + my $options = "extended_l2=on,";
> + $options .= preallocation_cmd_option($scfg, 'qcow2');
> + push @$cmd, '-o', $options;
> + run_command($cmd);
> +
> + } else {
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1091,19 +1123,39 @@ sub volume_snapshot {
> sub volume_rollback_is_possible {
> my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
>
> + if ($scfg->{snapext}) {
> + #technically, we could manage multibranch, we it need lot more work
> for snapshot delete
>>would multibranch be easier if there is a simple 1:1 correspondence
>>between snapshots and their filenames?
>>
>>switching to a different part of the "hierarchy" is then just
>>- delete current volume
>>- create new current volume using rollback target as backing file
the rollback/branch switch is not too difficult, maybe 1:1 naming could
help.
>>I guess deletion does become harder then, since it potentially
>>requires multiple rebases..
yes, The biggest difficulty is snapshot delete, as you need to create a
block-stream job, mergin/writing to each branch child, and you need to
do it atomically with a transaction with multiple jobs.
So yes, it's possible, but I wanted to keep it easy for now.
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $snappath = get_snap_path($path, $snap);
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> $volname);
> + my $currentpath = $snapshots->{current}->{file};
> + return 1 if !-e $snappath || $currentpath eq $snappath;
> +
> + die "can't rollback, '$snap' is not most recent snapshot on
> '$volname'\n";
> + }
> +
> return 1;
> }
>
> sub volume_snapshot_rollback {
> my ($class, $scfg, $storeid, $volname, $snap) = @_;
>
> - die "can't rollback snapshot this image format\n" if $volname !~
> m/\.(qcow2|qed)$/;
> + die "can't rollback snapshot this image format\n" if $volname !~
> m/\.(raw|qcow2|qed)$/;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + die "external snapshot need to be enabled to rollback snapshot
> .raw volumes\n" if $volname =~ m/\.(raw)$/ && !$scfg->{snapext};
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> + my $path = $class->filesystem_path($scfg, $volname);
>
> - run_command($cmd);
> + if ($scfg->{snapext}) {
> + #simply delete the current snapshot and recreate it
> + my $snappath = get_snap_path($path, $snap);
> + unlink($snappath);
> + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
this *reads* so weird ;) it is right given the current semantics
(current snapshot == live image, snapshot data actually stored in
parent snapshot)
> + } else {
> + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1111,17 +1163,50 @@ sub volume_snapshot_rollback {
> sub volume_snapshot_delete {
> my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
>
> - die "can't delete snapshot for this image format\n" if $volname
> !~ m/\.(qcow2|qed)$/;
> + die "can't delete snapshot for this image format\n" if $volname
> !~ m/\.(raw|qcow2|qed)$/;
> +
> + die "external snapshot need to be enabled to delete snapshot of
> .raw volumes\n" if !$scfg->{snapext};
>
> return 1 if $running;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + my $cmd = "";
> + if ($scfg->{snapext}) {
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> $volname);
> + my $snappath = $snapshots->{$snap}->{file};
> + return if !-e $snappath; #already deleted ?
> +
> + my $parentsnap = $snapshots->{$snap}->{parent};
> + my $childsnap = $snapshots->{$snap}->{child};
> + die "error: can't find a parent for this snapshot" if
> !$parentsnap;
>
> - $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> + my $parentpath = $snapshots->{$parentsnap}->{file};
> + my $parentformat = $snapshots->{$parentsnap}->{'format'} if
> $parentsnap;
> + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
> + my $childformat = $snapshots->{$childsnap}->{'format'} if
> $childsnap;
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> + print "merge snapshot $snap to $parentsnap\n";
> + $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> + run_command($cmd);
> +
> + #if we delete an intermediate snapshot, we need to link upper
> snapshot to base snapshot
> + if($childpath && -e $childpath) {
> + die "missing parentsnap snapshot to rebase child $childpath\n"
> if !$parentpath;
> + print "link $childsnap to $parentsnap\n";
> + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath,
> '-F', $parentformat, '-f', $childformat, $childpath];
> + run_command($cmd);
> + }
>>wouldn't a regular safe rebase work just as well, instead of commit +
>>unsafe rebase? if there is no parent, passing in "" as "new" backing
>>file should work..
I'll test it, but I'm pretty sure this is the correct way.
> +
> + #delete the snapshot
> + unlink($snappath);
> + } else {
> + my $path = $class->filesystem_path($scfg, $volname);
>
> - run_command($cmd);
> + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +
> + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1140,10 +1225,6 @@ sub volume_has_feature {
> my ($class, $scfg, $feature, $storeid, $volname, $snapname,
> $running, $opts) = @_;
>
> my $features = {
> - snapshot => {
> - current => { qcow2 => 1 },
> - snap => { qcow2 => 1 },
> - },
> clone => {
> base => { qcow2 => 1, raw => 1, vmdk => 1 },
> },
> @@ -1159,11 +1240,23 @@ sub volume_has_feature {
> base => { qcow2 => 1, raw => 1, vmdk => 1 },
> current => { qcow2 => 1, raw => 1, vmdk => 1 },
> },
> - rename => {
> - current => {qcow2 => 1, raw => 1, vmdk => 1},
> - },
> + 'rename' => {
> + current => { qcow2 => 1, raw => 1, vmdk => 1},
> + }
> };
>
> + if ($scfg->{snapext}) {
> + $features->{snapshot} = {
> + current => { raw => 1, qcow2 => 1 },
> + snap => { raw => 1, qcow2 => 1 },
> + }
> + } else {
> + $features->{snapshot} = {
> + current => { qcow2 => 1 },
> + snap => { qcow2 => 1 },
> + };
> + }
> +
>>this could just leave $features as it is, and add the "raw" bits:
>>
>>if ($scfg->{snapext}) {
>> $features->{snapshot}->{current}->{raw} = 1;
>> $features->{snapshot}->{snap}->{raw} = 1;
>>}
ok !
> if ($feature eq 'clone') {
> if (
> defined($opts->{valid_target_formats})
> @@ -1222,7 +1315,9 @@ sub list_images {
> }
>
> if ($vollist) {
> - my $found = grep { $_ eq $volid } @$vollist;
> + my $search_volid = $volid;
> + $search_volid =~ s/-snap-.*\./\./;
> + my $found = grep { $_ eq $search_volid } @$vollist;
> next if !$found;
> }
>
> @@ -1380,7 +1475,53 @@ sub status {
> sub volume_snapshot_info {
> my ($class, $scfg, $storeid, $volname) = @_;
>
> - die "volume_snapshot_info is not implemented for $class";
> + die "volume_snapshot_info is not implemented for $class" if
> !$scfg->{snapext};
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase,
> $format) = $class->parse_volname($volname);
> +
> + my $basevolname = $volname;
> + $basevolname =~ s/\.(raw|qcow2)$//;
> +
> + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> + my $info = {};
> + for my $snap (@$snapshots) {
> +
> + my $volid = $snap->{volid};
> + next if ($volid !~ m/$basevolname/);
>>this regex is broken w.r.t. partial matching!
>>
>>e.g., if a VM has both a disk -1.qcow2 and -11.qcow2 and I attempt to
>>snapshot it using external snapshots:
ok !
snapshotting 'drive-scsi0' (extsnap:131314/vm-131314-disk-0.raw)
Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-
test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on
preallocation=off compression_type=zlib size=200704
backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-
test.qcow2 backing_fmt=raw lazy_refcounts=off refcount_bits=16
snapshotting 'drive-scsi1' (extsnap:131314/vm-131314-disk-1.qcow2)
Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-
test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on
preallocation=off compression_type=zlib size=2147483648
backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
11.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
snapshotting 'drive-scsi2' (extsnap:131314/vm-131314-disk-11.qcow2)
qemu-img: /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-
test2.qcow2: Error: Trying to create an image with the same filename as
the backing file
snapshot create failed: starting cleanup
merge snapshot test2 to test
Image committed.
merge snapshot test2 to base
Image committed.
TASK ERROR: command '/usr/bin/qemu-img create -b
/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2
-F qcow2 -f qcow2 /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
11-snap-test2.qcow2 -o 'extended_l2=on,preallocation=off'' failed: exit
code 1
> +
> + my (undef, $snapvolname) = parse_volume_id($volid);
> + my $snapname = get_snapname_from_path($volid);
> + my $snapfile = $class->filesystem_path($scfg, $snapvolname,
> $snapname);
> + $snapname = 'base' if !$snapname;
> +
> + my $format = $snap->{'format'};
> + my $parentfile = $snap->{parent};
> + my $parentname = get_snapname_from_path($parentfile) if
> $parentfile;
> + $parentname = 'base' if !$parentname && $parentfile;
> +
> + $info->{$snapname}->{file} = $snapfile;
> + $info->{$snapname}->{volid} = $volid;
> + $info->{$snapname}->{'format'} = $format;
> + $info->{$snapname}->{parent} = $parentname if $parentname;
> + $info->{$parentname}->{child} = $snapname if $parentname;
> + }
> +
> + my $current = undef;
> + for my $id (keys %$info) {
> + my $snap = $info->{$id};
> + die "error: snap $id: you can't have multiple current snapshot:
> current:$current\n" if !$snap->{child} && $current;
> + $current = $id if !$snap->{child};
> + }
> +
> + if ($current) {
> + $info->{current}->{file} = $info->{$current}->{file};
> + $info->{current}->{'format'} = $info->{$current}->{'format'};
> + $info->{current}->{parent} = $info->{$current}->{parent};
> + }
> +
> + return $info;
> }
>
> sub activate_storage {
> @@ -1764,4 +1905,38 @@ sub config_aware_base_mkdir {
> }
> }
>
> +sub get_snap_path {
> + my ($path, $snap) = @_;
> +
> + my $basepath = "";
> + my $baseformat = "";
> + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-
> (.*))?\.(raw|qcow2)/) {
>>this regex is wrong - volumes can have arbitrary names after the -
>>disk- part..
ah sorry. do you have some example where it's used ? (maybe for efi or
other specific disk ?)
> + $basepath = $1;
> + $baseformat = $8;
> + }
> + my $format = $snap ? 'qcow2' : $baseformat;
> + my $snappath = $snap ? $basepath."-snap-$snap.$format" : undef;
> +
> + return $snappath;
> +}
> +
> +sub get_snapname_from_path {
> + my ($path) = @_;
> +
> + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-
> (.*))?\.(raw|qcow2)/) {
>>here as well.. and this whole helper is just used twice in
>>volume_snapshot_info, maybe it could be inlined or made private
ok !
> + my $snapname = $7;
> + return $snapname;
> + }
> + die "can't parse snapname from path";
> +}
> +
> +sub get_current_snapshot {
> + my ($class, $scfg, $storeid, $volname) = @_;
> + #IMPROVE ME: faster way to find current snapshot? (search the
> most recent created snapshot file ? need to works with lvm volume
> too)
> +
> + return if !$scfg->{snapext};
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> $volname);
> + return $snapshots->{current};
> +}
> +
> 1;
> --
> 2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
2024-10-23 10:13 ` Fabian Grünbichler
@ 2024-10-23 13:45 ` DERUMIER, Alexandre via pve-devel
[not found] ` <e976104d8ed7c365d8a482fa320a0691456e69c1.camel@groupe-cyllene.com>
1 sibling, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-23 13:45 UTC (permalink / raw)
To: pve-devel, f.gruenbichler; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 38690 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>, "f.gruenbichler@proxmox.com" <f.gruenbichler@proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
Date: Wed, 23 Oct 2024 13:45:07 +0000
Message-ID: <e976104d8ed7c365d8a482fa320a0691456e69c1.camel@groupe-cyllene.com>
>>I am not yet convinced this is somehow a good idea, but maybe you can
>>convince me otherwise ;)
>>
>>variant A: this is just useful for very short-lived snapshots
>>variant B: these snapshots are supposed to be long-lived
Can you defined "short "/ "long" ? and the different usecase ?
because for me, a snapshot is a snapshot. Sometime I take a snapshot
before doing some critical changes, but I can't known if I need to
rollback in next 2h, or next month.
I think that "long-lived" usecase is backup (but we don't need it),
or replication (this could apply here, if we want to add replication
for disaster recovery)
>>A is not something we want. we intentionally don't have non-thin LVM
>>snapshots for example.
AFAIK, we never had implemented it because LVM snasphot is slow as
hell.(as a lvm extent are around 4MB, if you want 4k on a snapshot, you
need to reread and rewrite the 4MB, so around 1000x over-
amplification and slow iops)
This is really the main blocker for my customers migrating from vmware
(and to be honest I have some of them going to oracle ovlm (with
ovirt), because ovirt support it this way).
>>B once I create a single snapshot, the "original" storage only
>>contains the data written up to that point, anything else is stored
>>on the "snapshot" storage. this means my snapshot storage must be at
>>least as fast/good/shared/.. as my original storage. in that case, I
>>can just use the snapshot storage directly and ditch the original
>>storage?
Sorry, but I don't understand why you are talking about
original/snapshot storage ? I never have thinked to use another storage
for external snapshot.
The patch is really to add external snapshot on same lvm storage,
through lvm additional volume, but with qcow2 format to have good
performance (vs slow lvm snapshot)
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am
> 30.09.2024 13:31 CEST geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-
> cyllene.com>
> ---
> src/PVE/Storage.pm | 2 +
> src/PVE/Storage/LvmQcow2Plugin.pm | 460
> ++++++++++++++++++++++++++++++
> src/PVE/Storage/Makefile | 3 +-
> 3 files changed, 464 insertions(+), 1 deletion(-)
> create mode 100644 src/PVE/Storage/LvmQcow2Plugin.pm
>
> diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> index 57b2038..119998f 100755
> --- a/src/PVE/Storage.pm
> +++ b/src/PVE/Storage.pm
> @@ -28,6 +28,7 @@ use PVE::Storage::Plugin;
> use PVE::Storage::DirPlugin;
> use PVE::Storage::LVMPlugin;
> use PVE::Storage::LvmThinPlugin;
> +use PVE::Storage::LvmQcow2Plugin;
> use PVE::Storage::NFSPlugin;
> use PVE::Storage::CIFSPlugin;
> use PVE::Storage::ISCSIPlugin;
> @@ -54,6 +55,7 @@ our $KNOWN_EXPORT_FORMATS = ['raw+size',
> 'tar+size', 'qcow2+size', 'vmdk+size',
> PVE::Storage::DirPlugin->register();
> PVE::Storage::LVMPlugin->register();
> PVE::Storage::LvmThinPlugin->register();
> +PVE::Storage::LvmQcow2Plugin->register();
> PVE::Storage::NFSPlugin->register();
> PVE::Storage::CIFSPlugin->register();
> PVE::Storage::ISCSIPlugin->register();
> diff --git a/src/PVE/Storage/LvmQcow2Plugin.pm
> b/src/PVE/Storage/LvmQcow2Plugin.pm
> new file mode 100644
> index 0000000..68c8686
> --- /dev/null
> +++ b/src/PVE/Storage/LvmQcow2Plugin.pm
> @@ -0,0 +1,460 @@
> +package PVE::Storage::LvmQcow2Plugin;
> +
> +use strict;
> +use warnings;
> +
> +use IO::File;
> +
> +use PVE::Tools qw(run_command trim);
> +use PVE::Storage::Plugin;
> +use PVE::Storage::LVMPlugin;
> +use PVE::JSONSchema qw(get_standard_option);
> +
> +use base qw(PVE::Storage::LVMPlugin);
> +
> +# Configuration
> +
> +sub type {
> + return 'lvmqcow2';
> +}
> +
> +sub plugindata {
> + return {
> + #container not yet implemented #need to implemented dm-qcow2
> + content => [ {images => 1, rootdir => 1}, { images => 1 }],
> + };
> +}
> +
> +sub properties {
> + return {
> + };
> +}
> +
> +sub options {
> + return {
> + vgname => { fixed => 1 },
> + nodes => { optional => 1 },
> + shared => { optional => 1 },
> + disable => { optional => 1 },
> + saferemove => { optional => 1 },
> + saferemove_throughput => { optional => 1 },
> + content => { optional => 1 },
> + base => { fixed => 1, optional => 1 },
> + tagged_only => { optional => 1 },
> + bwlimit => { optional => 1 },
> + snapext => { fixed => 1 },
> + };
> +}
> +
> +# Storage implementation
> +
> +sub parse_volname {
> + my ($class, $volname) = @_;
> +
> + PVE::Storage::Plugin::parse_lvm_name($volname);
> + my $format = $volname =~ m/^(.*)-snap-/ ? 'qcow2' : 'raw';
> +
> + if ($volname =~ m/^((vm|base)-(\d+)-\S+)$/) {
> + return ('images', $1, $3, undef, undef, $2 eq 'base',
> $format);
> + }
> +
> + die "unable to parse lvm volume name '$volname'\n";
> +}
> +
> +sub filesystem_path {
> + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
> +
> + my ($vtype, $name, $vmid) = $class->parse_volname($volname);
> +
> + my $vg = $scfg->{vgname};
> +
> + my $path = "/dev/$vg/$name";
> +
> + if($snapname) {
> + $path = get_snap_volname($path, $snapname);
> + } elsif ($current_snap) {
> + $path = $current_snap->{file};
> + }
> +
> + return wantarray ? ($path, $vmid, $vtype) : $path;
> +}
> +
> +sub create_base {
> + my ($class, $storeid, $scfg, $volname) = @_;
> +
> + my $vg = $scfg->{vgname};
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> + $class->parse_volname($volname);
> +
> + die "create_base not possible with base image\n" if $isBase;
> +
> + die "unable to create base volume - found snapshot" if $class-
> >snapshot_exist($scfg, $storeid, $volname);
> +
> + my $newname = $name;
> + $newname =~ s/^vm-/base-/;
> +
> + my $cmd = ['/sbin/lvrename', $vg, $volname, $newname];
> + run_command($cmd, errmsg => "lvrename '$vg/$volname' =>
> '$vg/$newname' error");
> +
> + # set inactive, read-only flags
> + $cmd = ['/sbin/lvchange', '-an', '-pr', "$vg/$newname"];
> + eval { run_command($cmd); };
> + warn $@ if $@;
> +
> + my $newvolname = $newname;
> +
> + return $newvolname;
> +}
> +
> +sub clone_image {
> + my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
> +
> + die "can't clone images in lvm storage\n";
> +}
> +
> +sub alloc_image {
> + my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> +
> + die "unsupported format '$fmt'" if $fmt ne 'raw';
> +
> + die "illegal name '$name' - should be 'vm-$vmid-*'\n"
> + if $name && $name !~ m/^vm-$vmid-/;
> +
> + my $vgs = PVE::Storage::LVMPlugin::lvm_vgs();
> +
> + my $vg = $scfg->{vgname};
> +
> + die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
> +
> + my $free = int($vgs->{$vg}->{free});
> +
> + die "not enough free space ($free < $size)\n" if $free < $size;
> +
> + $name = $class->find_free_diskname($storeid, $scfg, $vmid)
> + if !$name;
> +
> + my $tags = ["pve-vm-$vmid"];
> + if ($name =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
> + push @$tags, "\@pve-$1";
> + }
> +
> + PVE::Storage::LVMPlugin::lvcreate($vg, $name, $size, $tags);
> +
> + return $name;
> +}
> +
> +sub volume_snapshot_info {
> + my ($class, $scfg, $storeid, $volname) = @_;
> +
> + return $class->list_snapshots($scfg, $storeid, $volname);
> +}
> +
> +sub activate_volume {
> + my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
> +
> + my $lvm_activate_mode = 'ey';
> + my $tag = undef;
> +
> + #activate volume && all volumes snapshots by tag
> + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/)
> {
> + $tag = "\@pve-vm-$4-disk-$5";
> + }
> +
> + my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $tag];
> + run_command($cmd, errmsg => "can't activate LV '$tag'");
> +
> + $cmd = ['/sbin/lvchange', '--refresh', $tag];
> + run_command($cmd, errmsg => "can't refresh LV '$tag' for
> activation");
> +}
> +
> +sub deactivate_volume {
> + my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
> +
> + my $tag = undef;
> + #deactivate volume && all volumes snasphots by tag
> + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/)
> {
> + $tag = "\@pve-vm-$4-disk-$5";
> + }
> +
> + my $cmd = ['/sbin/lvchange', '-aln', $tag];
> + run_command($cmd, errmsg => "can't deactivate LV '$tag'");
> +}
> +
> +sub volume_resize {
> + my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
> +
> + #we should resize the base image and parents snapshots,
> + #but how to manage rollback ?
> +
> + die "can't resize if snasphots exist" if $class-
> >snapshot_exist($scfg, $storeid, $volname);
> +
> + return 1;
> +}
> +
> +sub volume_snapshot {
> + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +
> + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> +
> + my $current_path = $class->path($scfg, $volname, $storeid);
> + my $current_format =
> (PVE::Storage::Plugin::file_size_info($current_path))[1];
> + my $snappath = get_snap_volname($current_path, $snap);
> +
> + my $snapvolname = get_snap_volname($volname, $snap);
> + #allocate lvm snapshot volume
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> + $class->parse_volname($volname);
> + my $size = $class->volume_size_info($scfg, $storeid, $volname,
> 5);
> + #add 100M for qcow2 headers
> + $size = int($size/1024) + (100*1024);
> +
> + $class->alloc_image($storeid, $scfg, $vmid, 'raw', $snapvolname,
> $size);
> +
> + # create the qcow2 fs
> + eval {
> + my $cmd = ['/usr/bin/qemu-img', 'create', '-b',
> $current_path,
> + '-F', $current_format, '-f', 'qcow2', $snappath];
> + my $options = "extended_l2=on,";
> + $options .=
> PVE::Storage::Plugin::preallocation_cmd_option($scfg, 'qcow2');
> + push @$cmd, '-o', $options;
> + run_command($cmd);
> + };
> + if ($@) {
> + eval { $class->free_image($storeid, $scfg, $snapvolname, 0) };
> + warn $@ if $@;
> + }
> +}
> +
> +# Asserts that a rollback to $snap on $volname is possible.
> +# If certain snapshots are preventing the rollback and $blockers is
> an array
> +# reference, the snapshot names can be pushed onto $blockers prior
> to dying.
> +sub volume_rollback_is_possible {
> + my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $snappath = get_snap_volname($path, $snap);
> + my $currentpath = $class->path($scfg, $volname, $storeid);
> + return 1 if $currentpath eq $snappath;
> +
> + die "can't rollback, '$snap' is not most recent snapshot on
> '$volname'\n";
> +
> + return 1;
> +}
> +
> +sub volume_snapshot_rollback {
> + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +
> + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> + #simply delete the current snapshot and recreate it
> +
> + my $snapvolname = get_snap_volname($volname, $snap);
> +
> + $class->free_image($storeid, $scfg, $snapvolname, 0);
> + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
> +}
> +
> +sub volume_snapshot_delete {
> + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> +
> + return 1 if $running;
> +
> + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> $volname);
> + my $snappath = $snapshots->{$snap}->{file};
> + if(!$snappath) {
> + warn "$snap already deleted. skip\n";
> + return;
> + }
> +
> + my $snapvolname = $snapshots->{$snap}->{volname};
> + my $parentsnap = $snapshots->{$snap}->{parent};
> + my $childsnap = $snapshots->{$snap}->{child};
> + die "error: can't find a parent for this snapshot" if
> !$parentsnap;
> +
> + my $parentpath = $snapshots->{$parentsnap}->{file};
> + my $parentformat = $snapshots->{$parentsnap}->{'format'} if
> $parentsnap;
> + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
> + my $childformat = $snapshots->{$childsnap}->{'format'} if
> $childsnap;
> +
> + print "merge snapshot $snap to $parentsnap\n";
> + my $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> + run_command($cmd);
> +
> + #if we delete an intermediate snapshot, we need to link upper
> snapshot to base snapshot
> + if($childpath && -e $childpath) {
> + die "missing parentsnap snapshot to rebase child $childpath\n" if
> !$parentpath;
> + print "link $childsnap to $parentsnap\n";
> + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath, '-
> F', $parentformat, '-f', $childformat, $childpath];
> + run_command($cmd);
> + }
> +
> + #delete the snapshot
> + $class->free_image($storeid, $scfg, $snapvolname, 0);
> +
> + return;
> +}
> +
> +sub volume_has_feature {
> + my ($class, $scfg, $feature, $storeid, $volname, $snapname,
> $running) = @_;
> +
> + my $features = {
> + snapshot => { current => 1 },
> +# clone => { base => 1, snap => 1}, #don't allow to clone as we
> can't activate the base between different host ?
> + template => { current => 1},
> + copy => { base => 1, current => 1, snap => 1},
> + sparseinit => { base => 1, current => 1},
> + rename => {current => 1},
> + };
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> + $class->parse_volname($volname);
> +
> + my $key = undef;
> + if($snapname){
> + $key = 'snap';
> + }else{
> + $key = $isBase ? 'base' : 'current';
> + }
> + return 1 if $features->{$feature}->{$key};
> +
> + return undef;
> +}
> +
> +sub get_snap_volname {
> + my ($path, $snap) = @_;
> +
> + my $basepath = "";
> + my $baseformat = "";
> + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-
> Z0-9]+))?(\.(raw|qcow2))?/) {
> + $basepath = $1;
> + $baseformat = $8;
> + }
> + my $snapvolname = $basepath."-snap-$snap.qcow2";
> + return $snapvolname;
> +}
> +
> +sub get_snapname_from_path {
> + my ($path) = @_;
> +
> + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-
> Z0-9]+))?(\.(raw|qcow2))?/) {
> + my $snapname = $7;
> + return $snapname;
> + }
> + die "can't parse snapname from path $path";
> +}
> +
> +sub get_current_snapshot {
> + my ($class, $scfg, $storeid, $volname) = @_;
> +
> + #get more recent ctime volume
> + return $class->list_snapshots($scfg, $storeid, $volname, 1);
> +}
> +my $check_tags = sub {
> + my ($tags) = @_;
> +
> + return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
> +};
> +
> +sub list_images {
> + my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
> +
> + my $vgname = $scfg->{vgname};
> +
> + $cache->{lvs} = PVE::Storage::LVMPlugin::lvm_list_volumes() if
> !$cache->{lvs};
> +
> + my $res = [];
> +
> + if (my $dat = $cache->{lvs}->{$vgname}) {
> +
> + foreach my $volname (keys %$dat) {
> +
> + next if $volname !~ m/^(vm|base)-(\d+)-/;
> + my $owner = $2;
> +
> + my $info = $dat->{$volname};
> +
> + next if $scfg->{tagged_only} && !&$check_tags($info->{tags});
> +
> + # Allow mirrored and RAID LVs
> + next if $info->{lv_type} !~ m/^[-mMrR]$/;
> +
> + my $volid = "$storeid:$volname";
> +
> + if ($vollist) {
> + my $found = grep { $_ eq $volid } @$vollist;
> + next if !$found;
> + } else {
> + next if defined($vmid) && ($owner ne $vmid);
> + }
> +
> + push @$res, {
> + volid => $volid, format => 'raw', size => $info->{lv_size}, vmid =>
> $owner,
> + ctime => $info->{ctime},
> + };
> + }
> + }
> +
> + return $res;
> +}
> +
> +sub list_snapshots {
> + my ($class, $scfg, $storeid, $volname, $current_only) = @_;
> +
> + my $vgname = $scfg->{vgname};
> +
> + my $basevolname = $volname;
> + my $lvs = PVE::Storage::LVMPlugin::lvm_list_volumes($vgname);
> +
> + my $vg = $lvs->{$vgname};
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase,
> $format) = $class->parse_volname($volname);
> + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> +
> + my $info = {};
> + for my $snap (@$snapshots) {
> + my $snap_volid = $snap->{volid};
> + next if ($snap_volid !~ m/$basevolname/);
> +
> + my $snapname = get_snapname_from_path($snap_volid);
> + my (undef, $snap_volname) =
> PVE::Storage::parse_volume_id($snap_volid);
> + my $snapfile = $class->filesystem_path($scfg, $snap_volname,
> $snapname);
> + $snapname = 'base' if !$snapname;
> + $info->{$snapname}->{file} = $snapfile;
> + $info->{$snapname}->{volname} = $snap_volname;
> + $info->{$snapname}->{volid} = $snap_volid;
> + $info->{$snapname}->{ctime} = $snap->{ctime};
> +
> + if (!$current_only) {
> + my (undef, $format, undef, $parentfile, undef) =
> PVE::Storage::Plugin::file_size_info($snapfile);
> + next if !$parentfile && $snapname ne 'base'; #bad unlinked
> snasphot
> +
> + my $parentname = get_snapname_from_path($parentfile) if
> $parentfile;
> + $parentname = 'base' if !$parentname && $parentfile;
> +
> + $info->{$snapname}->{'format'} = $format;
> + $info->{$snapname}->{parent} = $parentname if $parentname;
> + $info->{$parentname}->{child} = $snapname if $parentname;
> + }
> + }
> +
> + my @snapshots_sorted = sort { $info->{$b}{ctime} <=> $info-
> >{$a}{ctime} } keys %$info;
> + my $current_snapname = $snapshots_sorted[0];
> + my $current_snapshot = $info->{$current_snapname};
> + return $current_snapshot if $current_only;
> +
> + $info->{current} = { %$current_snapshot };
> + return $info;
> +}
> +
> +sub snapshot_exist {
> + my ($class, $scfg, $storeid, $volname) = @_;
> +
> + my $basepath = $class->filesystem_path($scfg, $volname);
> + my $currentpath = $class->path($scfg, $volname, $storeid);
> +
> + die "can't resize if snasphots exist" if $currentpath ne
> $basepath;
> +
> +}
> +1;
> diff --git a/src/PVE/Storage/Makefile b/src/PVE/Storage/Makefile
> index d5cc942..1af8aab 100644
> --- a/src/PVE/Storage/Makefile
> +++ b/src/PVE/Storage/Makefile
> @@ -14,7 +14,8 @@ SOURCES= \
> PBSPlugin.pm \
> BTRFSPlugin.pm \
> LvmThinPlugin.pm \
> - ESXiPlugin.pm
> + ESXiPlugin.pm \
> + LvmQcow2Plugin.pm
>
> .PHONY: install
> install:
> --
> 2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
2024-10-23 10:14 ` Fabian Grünbichler
@ 2024-10-23 14:31 ` DERUMIER, Alexandre via pve-devel
2024-10-23 18:09 ` DERUMIER, Alexandre via pve-devel
[not found] ` <aeb9b8ea34826483eabe7fec5e2c12b1e22e132f.camel@groupe-cyllene.com>
2 siblings, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-23 14:31 UTC (permalink / raw)
To: pve-devel, f.gruenbichler; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 13807 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>, "f.gruenbichler@proxmox.com" <f.gruenbichler@proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
Date: Wed, 23 Oct 2024 14:31:33 +0000
Message-ID: <aeb9b8ea34826483eabe7fec5e2c12b1e22e132f.camel@groupe-cyllene.com>
>>if we want the current volume to keep its name, and the snapshot
>>volume to actually contain *that* snapshot's data, we need some sort
>>of rename dance here as well.. i.e., rename the current volume to
>>have the snapshot volume name, then snapshot it back into the
>>"current" name. not sure what the proper qmp runes would be to
>>achieve that?
I really to check that, but I would like to keep it as most atomic than
possible (avoid stream, double snapshot, and all fancy stuff just to
take a snapshot. because shit happen ^_^ , and generally it'll happen
when you'll take a snapshot with multiple disk, you'll to manage
recovery and current state of differents volumes)
Another stupid way is to use generic name for snapfile (maybe with
ctime inside the name), and create symlinks when snapshot is taken.(
qemu follow symlink and use realpaths for backing chain).
(and for lvm, it can be done with metadatas).
I'll really try to find a way with renaming volume, I'll keep you in
touch.
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
2024-10-23 10:14 ` Fabian Grünbichler
2024-10-23 14:31 ` DERUMIER, Alexandre via pve-devel
@ 2024-10-23 18:09 ` DERUMIER, Alexandre via pve-devel
[not found] ` <aeb9b8ea34826483eabe7fec5e2c12b1e22e132f.camel@groupe-cyllene.com>
2 siblings, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-23 18:09 UTC (permalink / raw)
To: pve-devel, f.gruenbichler; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 12624 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>, "f.gruenbichler@proxmox.com" <f.gruenbichler@proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
Date: Wed, 23 Oct 2024 18:09:31 +0000
Message-ID: <4abf03f224082e0fe20b248840b590f06c43fc5d.camel@groupe-cyllene.com>
ok, I think it could be possible to use blockdev-reopen to rename
current filename
https://lists.gnu.org/archive/html/qemu-devel/2021-05/msg04455.html
example: take snapshot : snap1 on vm-disk-100-disk-0.qcow2
- create a hardlink: ln vm-disk-100-disk-0.qcow2 vm-disk-100-disk-0-
snap1.qcow2
- qmp blockdev-reopen file vm-disk-100-disk-0-snap1.qcow2
- rm vm-disk-100-disk-0.qcow2
- create a snapshot with a new file vm-disk-100-disk-0.qcow2 with vm-
disk-100-disk-0-snap1.qcow2 as backing file
I'll try to test this tomorrow !
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
[not found] ` <2f07646b51c85ffe01089c2481dbb9680d75cfcb.camel@groupe-cyllene.com>
@ 2024-10-24 3:37 ` Esi Y via pve-devel
0 siblings, 0 replies; 27+ messages in thread
From: Esi Y via pve-devel @ 2024-10-24 3:37 UTC (permalink / raw)
Cc: Esi Y, pve-devel
[-- Attachment #1: Type: message/rfc822, Size: 7259 bytes --]
From: Esi Y <esiy0676+proxmox@gmail.com>
Cc: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support
Date: Thu, 24 Oct 2024 05:37:19 +0200
Message-ID: <CABtLnHrx=-DOvTYVbFcoJtYvQ=9PRA4g+vhV3Eix2LaqxRb4Kg@mail.gmail.com>
Thank you, Alexandre, for the background / historical information.
On Tue, Oct 22, 2024 at 4:54 PM DERUMIER, Alexandre
<alexandre.derumier@groupe-cyllene.com> wrote:
>
> -------- Message initial --------
> De: Esi Y via pve-devel <pve-devel@lists.proxmox.com>
> Répondre à: Proxmox VE development discussion <pve-
> devel@lists.proxmox.com>
> À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
> Cc: Esi Y <esiy0676+proxmox@gmail.com>
> Objet: Re: [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add
> external qcow2 snapshot support
> Date: 22/10/2024 11:51:31
>
> > wasting developers time that then, among other things, delays actual
>
> >>I hoped this would bump it up for Alexandre to get a response.
> As far I remember, when we have implement snapshot for qcow2 (I think
> in 2010~2011, I'm becoming old ^_^ ) , only internal snapshot was
> possible,
> because they were no block-commit job. (to merge data in parent on
> snapshot deletion).
I just found it relevant to point out that historical Wiki on Live snapshots
were approaching this with the external-first in mind - when I look at
revisions [1], already in 2011:
"Internal snapshots to images which support internal snapshots (QCOW2 & QED)
are not expected to be supported initially."
> That mean that we don't have a clean snapshot solution currently for
> shared san/nas without api.
And that it is overdue at least since 2017 (the original forum attempt
at discussion,
not mine).
Anyhow, good that is now being reviewed. There's enough interested in the said
forum for this already.
NB I don't believe I should have been asked to take that supporting
input elsewhere.
[1] https://wiki.qemu.org/index.php?title=Features/Snapshots&diff=5964&oldid=1405
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
[not found] ` <f066c13a25b30e3107a9dec8091b456ce2852293.camel@groupe-cyllene.com>
@ 2024-10-24 6:42 ` Fabian Grünbichler
2024-10-24 7:59 ` Giotta Simon RUAGH via pve-devel
2024-10-25 5:52 ` DERUMIER, Alexandre via pve-devel
0 siblings, 2 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-24 6:42 UTC (permalink / raw)
To: DERUMIER, Alexandre, pve-devel
> DERUMIER, Alexandre <alexandre.derumier@groupe-cyllene.com> hat am 23.10.2024 14:59 CEST geschrieben:
>
>
> Hi Fabian,
>
> thanks for the review !
>
> >>-------- Message initial --------
> >>De: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> >>À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
> >>Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
> >>Objet: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external
> >>snasphot support
> >>Date: 23/10/2024 12:12:46
> >>
> >>some high level comments:
> >>
> >>I am not sure how much we gain here with the raw support?
>
> They are really qcow2 overhead, mostly with big disk.
> as for good performance, the qcow2 l2-cache-size need to be keeped in
> memory (and it's 1MB by disk)
> https://events.static.linuxfound.org/sites/events/files/slides/kvm-forum-2017-slides.pdf
>
> Hopefully, they are improvments with the "new" sub-cluster feature
> https://people.igalia.com/berto/files/kvm-forum-2020-slides.pdf
> I'm already using it at snapshot create, but I think we should also use
> it for main qcow2 volume.
>
>
> But even with that, you can still have performance impact.
> So yes, I think they are really usecase for workload when you only need
> snapshot time to time (before an upgrade for example), but max
> performance with no snaphot exist.
my main point here is - all other storages treat snapshots as "cheap". if you combine raw+qcow2 snapshot overlays, suddenly performance will get worse if you keep a snapshot around for whatever reason..
> >> it's a bit confusing to have a volid ending with raw, with the
> >>current volume and all but the first snapshot actually being stored
> >>in qcow2 files, with the raw file being the "oldest" snapshot in the
> >>chain..
> if it's too confusing, we could use for example an .snap extension.
> (as we known that it's qcow2 behind)
I haven't thought yet about how to encode the snapshot name into the snapshot file name, but yeah, maybe something like that would be good. or maybe snap-VMID-disk-DISK.qcow2 ?
> if possible, I'd be much happier with the snapshot name in the snapshot
> file being a 1:1 match, see comments inline
>
> >>- makes it a lot easier to understand (admin wants to manually remove
> >>snapshot "foo", if "foo" was the last snapshot then right now the
> >>volume called "foo" is actually the current contents!)
>
> This part is really difficult, because you can't known in advance the
> name of the snapshot you'll take in the future. The only way could be
> to create a "current" volume , rename it when you took another
> snasphot (I'm not sure it's possible to do it live,
> and this could break link chain too)
>
> Also, I don't known how to manage the main volume, when you take the
> first snapshot ? we should rename it too.
I mean, if we don't allow .raw files to be snapshotted then this problem doesn't exist ;)
> so "vm-disk-100-disk-0.raw|qcow2" , become "vm-disk-100-disk-0-
> snap1.(raw|qcow2)" + new "vm-disk-100-disk-0-current.qcow2" ?
the volid changing on snapshot seems like it would require a lot of adaption.. OTOH, the volid containing a wrong format might also break things.
> I'll try to do test again to see what is possible.
>
>
>
>
> >>- means we don't have to do lookups via the full snapshot list all
> >>the time (e.g., if I want to do a full clone from a snapshot "foo", I
> >>can just pass the snap-foo volume to qemu-img)
>
> ok got it
>
>
>
> >>the naming scheme for snapshots needs to be adapted to not clash with
> >>regular volumes:
>
> >>$ pvesm alloc extsnap 131314 vm-131314-disk-foobar.qcow2 2G
> >>Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
> >>foobar.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off
> >>preallocation=off compression_type=zlib size=2147483648
> >>lazy_refcounts=off refcount_bits=16
> >>successfully created 'extsnap:131314/vm-131314-disk-foobar.qcow2'
> >>$ qm rescan --vmid 131314
> >>rescan volumes...
> >>can't parse snapname from path at
> >>/usr/share/perl5/PVE/Storage/Plugin.pm line 1934.
>
> any preference for naming scheme ? for lvm external snap, I have used
> "vm-131314-disk-0-snap-<foobar>";
see above
> >>storage_migrate needs to handle external snapshots, or at least error
> >>out.
> it should already work. (I have tested move_disk, and live migration +
> storage migration). qemu_img_convert offline and qemu block job for
> live.
but don't all of those lose the snapshots? did you test it with snapshots and rollback afterwards?
> >>I haven't tested that part or linked clones or a lot of other
> >>advanced related actions at all ;)
>
> For linked clone, we can't have a base image with snapshots (other than
> _base_). so It'll be safe.
ack
> > Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am
> > 30.09.2024 13:31 CEST geschrieben:
> > Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-
> > cyllene.com>
> > ---
> > src/PVE/Storage/DirPlugin.pm | 1 +
> > src/PVE/Storage/Plugin.pm | 225 +++++++++++++++++++++++++++++++--
> > --
> > 2 files changed, 201 insertions(+), 25 deletions(-)
> >
> > diff --git a/src/PVE/Storage/DirPlugin.pm
> > b/src/PVE/Storage/DirPlugin.pm
> > index 2efa8d5..2bef673 100644
> > --- a/src/PVE/Storage/DirPlugin.pm
> > +++ b/src/PVE/Storage/DirPlugin.pm
> > @@ -80,6 +80,7 @@ sub options {
> > is_mountpoint => { optional => 1 },
> > bwlimit => { optional => 1 },
> > preallocation => { optional => 1 },
> > + snapext => { optional => 1 },
> > };
> > }
> >
> > diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> > index 6444390..5e5197a 100644
> > --- a/src/PVE/Storage/Plugin.pm
> > +++ b/src/PVE/Storage/Plugin.pm
> > @@ -214,6 +214,11 @@ my $defaultData = {
> > maximum => 65535,
> > optional => 1,
> > },
> > + 'snapext' => {
> > + type => 'boolean',
> > + description => 'enable external snapshot.',
> > + optional => 1,
> > + },
> > },
> > };
> >
> > @@ -695,7 +700,7 @@ sub get_subdir {
> > }
> >
> > sub filesystem_path {
> > - my ($class, $scfg, $volname, $snapname) = @_;
> > + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
>
> see comment below
>
> >
> > my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
> > $class->parse_volname($volname);
> > @@ -703,7 +708,7 @@ sub filesystem_path {
> > # Note: qcow2/qed has internal snapshot, so path is always
> > # the same (with or without snapshot => same file).
> > die "can't snapshot this image format\n"
> > - if defined($snapname) && $format !~ m/^(qcow2|qed)$/;
> > + if defined($snapname) && !$scfg->{snapext} && $format !~
> > m/^(qcow2|qed)$/;
> >
> > my $dir = $class->get_subdir($scfg, $vtype);
> >
> > @@ -711,13 +716,22 @@ sub filesystem_path {
> >
> > my $path = "$dir/$name";
> >
> > + if($scfg->{snapext}) {
> > + my $snappath = get_snap_path($path, $snapname);
> > + if($snapname) {
> > + $path = $snappath;
> > + } elsif ($current_snap) {
> > + $path = $current_snap->{file};
> > + }
> > + }
>
> see commente below
>
> > return wantarray ? ($path, $vmid, $vtype) : $path;
> > }
> >
> > sub path {
> > my ($class, $scfg, $volname, $storeid, $snapname) = @_;
> >
> > - return $class->filesystem_path($scfg, $volname, $snapname);
> > + my $current_snapshot = $class->get_current_snapshot($scfg,
> > $storeid, $volname);
>
> >>this is pretty expensive, and would only be needed if $snapname is
> >>not set..
>
> The main problem is when you start a vm on a specific snasphot,
> we don't send the $snapname param.
>
> One way could be that qemu-server check the current snapshot from
> config when doing specific action like start.
if we manage to find a way to make the volid always point at the top overlay, then that wouldn't be needed..
> > + return $class->filesystem_path($scfg, $volname, $snapname,
> > $current_snapshot);
>
> >>couldn't we avoid extending the signature of filesystem_path and just
> pass the name of the current snapshot as $snapname?
>
> I need to redo test, I don't remember why I have splitted them, but you
> are right, it should be cleaner.
>
> > }
> >
> > sub create_base {
> > @@ -1074,13 +1088,31 @@ sub volume_resize {
> > sub volume_snapshot {
> > my ($class, $scfg, $storeid, $volname, $snap) = @_;
> >
> > - die "can't snapshot this image format\n" if $volname !~
> > m/\.(qcow2|qed)$/;
> > + die "can't snapshot this image format\n" if $volname !~
> > m/\.(raw|qcow2|qed)$/;
> >
> > - my $path = $class->filesystem_path($scfg, $volname);
> > + die "external snapshot need to be enabled to snapshot .raw
> > volumes\n" if !$scfg->{snapext};
>
> >>this condition is definitely wrong - it means no more snapshotting
> >>unless external snapshot support is enabled..
>
> oops, sorry.
>
> >
> > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> > + if($scfg->{snapext}) {
> >
> > - run_command($cmd);
> > + my $path = $class->path($scfg, $volname, $storeid);
> > +
> > + my $snappath = get_snap_path($path, $snap);
> > + my $format = ($class->parse_volname($volname))[6];
> > +
> > + my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $path,
> > + '-F', $format, '-f', 'qcow2', $snappath];
>
> >>see comments on qemu-server, but.. wouldn't it be better if the file
> >>with $snap in its name would be the one storing that snapshot's data?
> >>i.e., rename the "current" volume to be called ...-$snap... , and
> >>then create a new "current" file without a suffix with the renamed
> >>volume as backing file?
>
> I'll try it !
>
> > +
> > + my $options = "extended_l2=on,";
> > + $options .= preallocation_cmd_option($scfg, 'qcow2');
> > + push @$cmd, '-o', $options;
> > + run_command($cmd);
> > +
> > + } else {
> > +
> > + my $path = $class->filesystem_path($scfg, $volname);
> > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> > + run_command($cmd);
> > + }
> >
> > return undef;
> > }
> > @@ -1091,19 +1123,39 @@ sub volume_snapshot {
> > sub volume_rollback_is_possible {
> > my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> >
> > + if ($scfg->{snapext}) {
> > + #technically, we could manage multibranch, we it need lot more work
> > for snapshot delete
>
> >>would multibranch be easier if there is a simple 1:1 correspondence
> >>between snapshots and their filenames?
> >>
> >>switching to a different part of the "hierarchy" is then just
> >>- delete current volume
> >>- create new current volume using rollback target as backing file
> the rollback/branch switch is not too difficult, maybe 1:1 naming could
> help.
>
> >>I guess deletion does become harder then, since it potentially
> >>requires multiple rebases..
>
> yes, The biggest difficulty is snapshot delete, as you need to create a
> block-stream job, mergin/writing to each branch child, and you need to
> do it atomically with a transaction with multiple jobs.
> So yes, it's possible, but I wanted to keep it easy for now.
sure, this restriction could be lifted in a follow-up!
> > + my $path = $class->filesystem_path($scfg, $volname);
> > + my $snappath = get_snap_path($path, $snap);
> > +
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + my $currentpath = $snapshots->{current}->{file};
> > + return 1 if !-e $snappath || $currentpath eq $snappath;
> > +
> > + die "can't rollback, '$snap' is not most recent snapshot on
> > '$volname'\n";
> > + }
> > +
> > return 1;
> > }
> >
> > sub volume_snapshot_rollback {
> > my ($class, $scfg, $storeid, $volname, $snap) = @_;
> >
> > - die "can't rollback snapshot this image format\n" if $volname !~
> > m/\.(qcow2|qed)$/;
> > + die "can't rollback snapshot this image format\n" if $volname !~
> > m/\.(raw|qcow2|qed)$/;
> >
> > - my $path = $class->filesystem_path($scfg, $volname);
> > + die "external snapshot need to be enabled to rollback snapshot
> > .raw volumes\n" if $volname =~ m/\.(raw)$/ && !$scfg->{snapext};
> >
> > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> > + my $path = $class->filesystem_path($scfg, $volname);
> >
> > - run_command($cmd);
> > + if ($scfg->{snapext}) {
> > + #simply delete the current snapshot and recreate it
> > + my $snappath = get_snap_path($path, $snap);
> > + unlink($snappath);
> > + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
>
> this *reads* so weird ;) it is right given the current semantics
> (current snapshot == live image, snapshot data actually stored in
> parent snapshot)
>
> > + } else {
> > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> > + run_command($cmd);
> > + }
> >
> > return undef;
> > }
> > @@ -1111,17 +1163,50 @@ sub volume_snapshot_rollback {
> > sub volume_snapshot_delete {
> > my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> >
> > - die "can't delete snapshot for this image format\n" if $volname
> > !~ m/\.(qcow2|qed)$/;
> > + die "can't delete snapshot for this image format\n" if $volname
> > !~ m/\.(raw|qcow2|qed)$/;
> > +
> > + die "external snapshot need to be enabled to delete snapshot of
> > .raw volumes\n" if !$scfg->{snapext};
> >
> > return 1 if $running;
> >
> > - my $path = $class->filesystem_path($scfg, $volname);
> > + my $cmd = "";
> > + if ($scfg->{snapext}) {
> > +
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + my $snappath = $snapshots->{$snap}->{file};
> > + return if !-e $snappath; #already deleted ?
> > +
> > + my $parentsnap = $snapshots->{$snap}->{parent};
> > + my $childsnap = $snapshots->{$snap}->{child};
> > + die "error: can't find a parent for this snapshot" if
> > !$parentsnap;
> >
> > - $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> > + my $parentpath = $snapshots->{$parentsnap}->{file};
> > + my $parentformat = $snapshots->{$parentsnap}->{'format'} if
> > $parentsnap;
> > + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
> > + my $childformat = $snapshots->{$childsnap}->{'format'} if
> > $childsnap;
> >
> > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> > + print "merge snapshot $snap to $parentsnap\n";
> > + $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> > + run_command($cmd);
> > +
> > + #if we delete an intermediate snapshot, we need to link upper
> > snapshot to base snapshot
> > + if($childpath && -e $childpath) {
> > + die "missing parentsnap snapshot to rebase child $childpath\n"
> > if !$parentpath;
> > + print "link $childsnap to $parentsnap\n";
> > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath,
> > '-F', $parentformat, '-f', $childformat, $childpath];
> > + run_command($cmd);
> > + }
>
> >>wouldn't a regular safe rebase work just as well, instead of commit +
> >>unsafe rebase? if there is no parent, passing in "" as "new" backing
> >>file should work..
>
> I'll test it, but I'm pretty sure this is the correct way.
>
> > +
> > + #delete the snapshot
> > + unlink($snappath);
> > + } else {
> > + my $path = $class->filesystem_path($scfg, $volname);
> >
> > - run_command($cmd);
> > + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> > +
> > + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> > + run_command($cmd);
> > + }
> >
> > return undef;
> > }
> > @@ -1140,10 +1225,6 @@ sub volume_has_feature {
> > my ($class, $scfg, $feature, $storeid, $volname, $snapname,
> > $running, $opts) = @_;
> >
> > my $features = {
> > - snapshot => {
> > - current => { qcow2 => 1 },
> > - snap => { qcow2 => 1 },
> > - },
> > clone => {
> > base => { qcow2 => 1, raw => 1, vmdk => 1 },
> > },
> > @@ -1159,11 +1240,23 @@ sub volume_has_feature {
> > base => { qcow2 => 1, raw => 1, vmdk => 1 },
> > current => { qcow2 => 1, raw => 1, vmdk => 1 },
> > },
> > - rename => {
> > - current => {qcow2 => 1, raw => 1, vmdk => 1},
> > - },
> > + 'rename' => {
> > + current => { qcow2 => 1, raw => 1, vmdk => 1},
> > + }
> > };
> >
> > + if ($scfg->{snapext}) {
> > + $features->{snapshot} = {
> > + current => { raw => 1, qcow2 => 1 },
> > + snap => { raw => 1, qcow2 => 1 },
> > + }
> > + } else {
> > + $features->{snapshot} = {
> > + current => { qcow2 => 1 },
> > + snap => { qcow2 => 1 },
> > + };
> > + }
> > +
>
> >>this could just leave $features as it is, and add the "raw" bits:
> >>
> >>if ($scfg->{snapext}) {
> >> $features->{snapshot}->{current}->{raw} = 1;
> >> $features->{snapshot}->{snap}->{raw} = 1;
> >>}
>
> ok !
> > if ($feature eq 'clone') {
> > if (
> > defined($opts->{valid_target_formats})
> > @@ -1222,7 +1315,9 @@ sub list_images {
> > }
> >
> > if ($vollist) {
> > - my $found = grep { $_ eq $volid } @$vollist;
> > + my $search_volid = $volid;
> > + $search_volid =~ s/-snap-.*\./\./;
> > + my $found = grep { $_ eq $search_volid } @$vollist;
> > next if !$found;
> > }
> >
> > @@ -1380,7 +1475,53 @@ sub status {
> > sub volume_snapshot_info {
> > my ($class, $scfg, $storeid, $volname) = @_;
> >
> > - die "volume_snapshot_info is not implemented for $class";
> > + die "volume_snapshot_info is not implemented for $class" if
> > !$scfg->{snapext};
> > +
> > + my $path = $class->filesystem_path($scfg, $volname);
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase,
> > $format) = $class->parse_volname($volname);
> > +
> > + my $basevolname = $volname;
> > + $basevolname =~ s/\.(raw|qcow2)$//;
> > +
> > + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> > + my $info = {};
> > + for my $snap (@$snapshots) {
> > +
> > + my $volid = $snap->{volid};
> > + next if ($volid !~ m/$basevolname/);
>
> >>this regex is broken w.r.t. partial matching!
> >>
> >>e.g., if a VM has both a disk -1.qcow2 and -11.qcow2 and I attempt to
> >>snapshot it using external snapshots:
> ok !
>
>
> snapshotting 'drive-scsi0' (extsnap:131314/vm-131314-disk-0.raw)
> Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-
> test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on
> preallocation=off compression_type=zlib size=200704
> backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-
> test.qcow2 backing_fmt=raw lazy_refcounts=off refcount_bits=16
> snapshotting 'drive-scsi1' (extsnap:131314/vm-131314-disk-1.qcow2)
> Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-
> test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on
> preallocation=off compression_type=zlib size=2147483648
> backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
> 11.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
> snapshotting 'drive-scsi2' (extsnap:131314/vm-131314-disk-11.qcow2)
> qemu-img: /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-
> test2.qcow2: Error: Trying to create an image with the same filename as
> the backing file
> snapshot create failed: starting cleanup
> merge snapshot test2 to test
> Image committed.
> merge snapshot test2 to base
> Image committed.
> TASK ERROR: command '/usr/bin/qemu-img create -b
> /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2
> -F qcow2 -f qcow2 /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
> 11-snap-test2.qcow2 -o 'extended_l2=on,preallocation=off'' failed: exit
> code 1
>
> > +
> > + my (undef, $snapvolname) = parse_volume_id($volid);
> > + my $snapname = get_snapname_from_path($volid);
> > + my $snapfile = $class->filesystem_path($scfg, $snapvolname,
> > $snapname);
> > + $snapname = 'base' if !$snapname;
> > +
> > + my $format = $snap->{'format'};
> > + my $parentfile = $snap->{parent};
> > + my $parentname = get_snapname_from_path($parentfile) if
> > $parentfile;
> > + $parentname = 'base' if !$parentname && $parentfile;
> > +
> > + $info->{$snapname}->{file} = $snapfile;
> > + $info->{$snapname}->{volid} = $volid;
> > + $info->{$snapname}->{'format'} = $format;
> > + $info->{$snapname}->{parent} = $parentname if $parentname;
> > + $info->{$parentname}->{child} = $snapname if $parentname;
> > + }
> > +
> > + my $current = undef;
> > + for my $id (keys %$info) {
> > + my $snap = $info->{$id};
> > + die "error: snap $id: you can't have multiple current snapshot:
> > current:$current\n" if !$snap->{child} && $current;
> > + $current = $id if !$snap->{child};
> > + }
> > +
> > + if ($current) {
> > + $info->{current}->{file} = $info->{$current}->{file};
> > + $info->{current}->{'format'} = $info->{$current}->{'format'};
> > + $info->{current}->{parent} = $info->{$current}->{parent};
> > + }
> > +
> > + return $info;
> > }
> >
> > sub activate_storage {
> > @@ -1764,4 +1905,38 @@ sub config_aware_base_mkdir {
> > }
> > }
> >
> > +sub get_snap_path {
> > + my ($path, $snap) = @_;
> > +
> > + my $basepath = "";
> > + my $baseformat = "";
> > + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-
> > (.*))?\.(raw|qcow2)/) {
>
> >>this regex is wrong - volumes can have arbitrary names after the -
> >>disk- part..
>
> ah sorry. do you have some example where it's used ? (maybe for efi or
> other specific disk ?)
no, any vdisk can have (almost) anything after the -disk- part. you can allocate such volumes using `pvesm alloc` or the API (we just are not very good at keeping those custom suffixes when moving/migrating/.. ;))
> > + $basepath = $1;
> > + $baseformat = $8;
> > + }
> > + my $format = $snap ? 'qcow2' : $baseformat;
> > + my $snappath = $snap ? $basepath."-snap-$snap.$format" : undef;
> > +
> > + return $snappath;
> > +}
> > +
> > +sub get_snapname_from_path {
> > + my ($path) = @_;
> > +
> > + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-
> > (.*))?\.(raw|qcow2)/) {
>
> >>here as well.. and this whole helper is just used twice in
> >>volume_snapshot_info, maybe it could be inlined or made private
> ok !
>
>
> > + my $snapname = $7;
> > + return $snapname;
> > + }
> > + die "can't parse snapname from path";
> > +}
> > +
> > +sub get_current_snapshot {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > + #IMPROVE ME: faster way to find current snapshot? (search the
> > most recent created snapshot file ? need to works with lvm volume
> > too)
> > +
> > + return if !$scfg->{snapext};
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + return $snapshots->{current};
> > +}
> > +
> > 1;
> > --
> > 2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
[not found] ` <e976104d8ed7c365d8a482fa320a0691456e69c1.camel@groupe-cyllene.com>
@ 2024-10-24 7:42 ` Fabian Grünbichler
2024-10-24 11:01 ` DERUMIER, Alexandre via pve-devel
0 siblings, 1 reply; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-24 7:42 UTC (permalink / raw)
To: DERUMIER, Alexandre, pve-devel
> DERUMIER, Alexandre <alexandre.derumier@groupe-cyllene.com> hat am 23.10.2024 15:45 CEST geschrieben:
>
>
> >>I am not yet convinced this is somehow a good idea, but maybe you can
> >>convince me otherwise ;)
I maybe judged this too quickly - I thought this was combining LVM + a dir-based storage, but this is putting the qcow2 overlays on LVs, which I missed on the first pass!
> >>variant A: this is just useful for very short-lived snapshots
> >>variant B: these snapshots are supposed to be long-lived
>
> Can you defined "short "/ "long" ? and the different usecase ?
>
> because for me, a snapshot is a snapshot. Sometime I take a snapshot
> before doing some critical changes, but I can't known if I need to
> rollback in next 2h, or next month.
yes, this would be an example of a short-lived snapshot
> I think that "long-lived" usecase is backup (but we don't need it),
> or replication (this could apply here, if we want to add replication
> for disaster recovery)
backup would also be short-lived usually (the snapshot is just to take the backup, not to keep a backup). long-lived usually is something like "take daily snapshot and keep for a few weeks for file recovery", in addition to regular backups. or "snapshot because we just had an incidence and might need this for forensics in a few months" (can also be solved with backups, of course ;)).
the main difference between the two is - for short-lived snapshots performance implications related to snapshots existing are not that important. I can live with a few hours of degraded performance, if the snapshot is part of some important process/work flow. with long-lived snapshots there is a requirement for them to not hurt performance just by existing, because otherwise you can't use them. there is a second reason why long-lived snapshots can be impossible - if you need to decide up-front how "big" the delta of that snapshot can grow at most, then in PVE context, you always need to allocate the full volume size (regular thick LVM had both issues - bad performance, and new writes going into a thick snapshot volume).
if you can support long-lived snapshots, then you automatically also support short-lived snapshots. the other direction doesn't hold. since PVE only has one kind of snapshots, they need to be useful for long-lived snapshots.
> >>A is not something we want. we intentionally don't have non-thin LVM
> >>snapshots for example.
>
> AFAIK, we never had implemented it because LVM snasphot is slow as
> hell.(as a lvm extent are around 4MB, if you want 4k on a snapshot, you
> need to reread and rewrite the 4MB, so around 1000x over-
> amplification and slow iops)
see above - there's two issues, one is performance, the other is that you need to either
- make the snapshot smaller than the original volume (and risk running out of space)
- make the snapshot as big as the original volume (and blow up space requirements)
(thick) LVM snapshots basically barely work for the "take a consistent backup during quiet periods" use case, and not much else.
> This is really the main blocker for my customers migrating from vmware
> (and to be honest I have some of them going to oracle ovlm (with
> ovirt), because ovirt support it this way).
> >>B once I create a single snapshot, the "original" storage only
> >>contains the data written up to that point, anything else is stored
> >>on the "snapshot" storage. this means my snapshot storage must be at
> >>least as fast/good/shared/.. as my original storage. in that case, I
> >>can just use the snapshot storage directly and ditch the original
> >>storage?
>
> Sorry, but I don't understand why you are talking about
> original/snapshot storage ? I never have thinked to use another storage
> for external snapshot.
>
> The patch is really to add external snapshot on same lvm storage,
> through lvm additional volume, but with qcow2 format to have good
> performance (vs slow lvm snapshot)
see above - I misread and answered too quickly.
I took a closer look and replied inline below with some comments - much of it mimics the comments for the dir plugin..
> > Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-
> > cyllene.com>
> > ---
> > src/PVE/Storage.pm | 2 +
> > src/PVE/Storage/LvmQcow2Plugin.pm | 460
> > ++++++++++++++++++++++++++++++
> > src/PVE/Storage/Makefile | 3 +-
> > 3 files changed, 464 insertions(+), 1 deletion(-)
> > create mode 100644 src/PVE/Storage/LvmQcow2Plugin.pm
> >
> > diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> > index 57b2038..119998f 100755
> > --- a/src/PVE/Storage.pm
> > +++ b/src/PVE/Storage.pm
> > @@ -28,6 +28,7 @@ use PVE::Storage::Plugin;
> > use PVE::Storage::DirPlugin;
> > use PVE::Storage::LVMPlugin;
> > use PVE::Storage::LvmThinPlugin;
> > +use PVE::Storage::LvmQcow2Plugin;
> > use PVE::Storage::NFSPlugin;
> > use PVE::Storage::CIFSPlugin;
> > use PVE::Storage::ISCSIPlugin;
> > @@ -54,6 +55,7 @@ our $KNOWN_EXPORT_FORMATS = ['raw+size',
> > 'tar+size', 'qcow2+size', 'vmdk+size',
> > PVE::Storage::DirPlugin->register();
> > PVE::Storage::LVMPlugin->register();
> > PVE::Storage::LvmThinPlugin->register();
> > +PVE::Storage::LvmQcow2Plugin->register();
> > PVE::Storage::NFSPlugin->register();
> > PVE::Storage::CIFSPlugin->register();
> > PVE::Storage::ISCSIPlugin->register();
> > diff --git a/src/PVE/Storage/LvmQcow2Plugin.pm
> > b/src/PVE/Storage/LvmQcow2Plugin.pm
> > new file mode 100644
> > index 0000000..68c8686
> > --- /dev/null
> > +++ b/src/PVE/Storage/LvmQcow2Plugin.pm
> > @@ -0,0 +1,460 @@
> > +package PVE::Storage::LvmQcow2Plugin;
> > +
> > +use strict;
> > +use warnings;
> > +
> > +use IO::File;
> > +
> > +use PVE::Tools qw(run_command trim);
> > +use PVE::Storage::Plugin;
> > +use PVE::Storage::LVMPlugin;
> > +use PVE::JSONSchema qw(get_standard_option);
> > +
> > +use base qw(PVE::Storage::LVMPlugin);
could we integrate this into the LVM plugin if we implement it? basically add the "snapext" option, which is fixed, and if it is set, disallow rootdir?
probably snapext should also be fixed for dir(-based) storages, since toggling it when snapshots exist would break a ton of stuff?
> > +
> > +# Configuration
> > +
> > +sub type {
> > + return 'lvmqcow2';
> > +}
> > +
> > +sub plugindata {
> > + return {
> > + #container not yet implemented #need to implemented dm-qcow2
> > + content => [ {images => 1, rootdir => 1}, { images => 1 }],
then rootdir shouldn't be mentioned here at all? the first member contains the possible content types, the second the default if no explicit ones are set..
> > + };
> > +}
> > +
> > +sub properties {
> > + return {
> > + };
> > +}
> > +
> > +sub options {
> > + return {
> > + vgname => { fixed => 1 },
> > + nodes => { optional => 1 },
> > + shared => { optional => 1 },
> > + disable => { optional => 1 },
> > + saferemove => { optional => 1 },
> > + saferemove_throughput => { optional => 1 },
> > + content => { optional => 1 },
> > + base => { fixed => 1, optional => 1 },
> > + tagged_only => { optional => 1 },
> > + bwlimit => { optional => 1 },
> > + snapext => { fixed => 1 },
> > + };
> > +}
> > +
> > +# Storage implementation
> > +
> > +sub parse_volname {
> > + my ($class, $volname) = @_;
> > +
> > + PVE::Storage::Plugin::parse_lvm_name($volname);
> > + my $format = $volname =~ m/^(.*)-snap-/ ? 'qcow2' : 'raw';
> > +
> > + if ($volname =~ m/^((vm|base)-(\d+)-\S+)$/) {
> > + return ('images', $1, $3, undef, undef, $2 eq 'base',
> > $format);
> > + }
I wonder if here we also want to keep the volid/volname like it is, but name the LVs for snapshots differently?
> > +
> > + die "unable to parse lvm volume name '$volname'\n";
> > +}
> > +
> > +sub filesystem_path {
> > + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
> > +
> > + my ($vtype, $name, $vmid) = $class->parse_volname($volname);
> > +
> > + my $vg = $scfg->{vgname};
> > +
> > + my $path = "/dev/$vg/$name";
> > +
> > + if($snapname) {
> > + $path = get_snap_volname($path, $snapname);
> > + } elsif ($current_snap) {
> > + $path = $current_snap->{file};
> > + }
see comment for the dir storage ;)
> > +
> > + return wantarray ? ($path, $vmid, $vtype) : $path;
> > +}
> > +
> > +sub create_base {
> > + my ($class, $storeid, $scfg, $volname) = @_;
> > +
> > + my $vg = $scfg->{vgname};
nit: this could move below, closer to where it is used..
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> > + $class->parse_volname($volname);
> > +
> > + die "create_base not possible with base image\n" if $isBase;
> > +
> > + die "unable to create base volume - found snapshot" if $class-
> > >snapshot_exist($scfg, $storeid, $volname);
> > +
> > + my $newname = $name;
> > + $newname =~ s/^vm-/base-/;
> > +
> > + my $cmd = ['/sbin/lvrename', $vg, $volname, $newname];
> > + run_command($cmd, errmsg => "lvrename '$vg/$volname' =>
> > '$vg/$newname' error");
> > +
> > + # set inactive, read-only flags
> > + $cmd = ['/sbin/lvchange', '-an', '-pr', "$vg/$newname"];
> > + eval { run_command($cmd); };
> > + warn $@ if $@;
> > +
> > + my $newvolname = $newname;
> > +
> > + return $newvolname;
> > +}
> > +
> > +sub clone_image {
> > + my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
> > +
> > + die "can't clone images in lvm storage\n";
> > +}
> > +
> > +sub alloc_image {
> > + my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> > +
> > + die "unsupported format '$fmt'" if $fmt ne 'raw';
here I also wonder whether it wouldn't be "easier" to only allocate qcow2 formatted LVs (also for the initial allocation)?
otherwise, this is basically alloc_image from the LVMPlugin, just with the extra tags added, which could also be done where the snapshots are handled further below..
> > +
> > + die "illegal name '$name' - should be 'vm-$vmid-*'\n"
> > + if $name && $name !~ m/^vm-$vmid-/;
> > +
> > + my $vgs = PVE::Storage::LVMPlugin::lvm_vgs();
> > +
> > + my $vg = $scfg->{vgname};
> > +
> > + die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
> > +
> > + my $free = int($vgs->{$vg}->{free});
> > +
> > + die "not enough free space ($free < $size)\n" if $free < $size;
> > +
> > + $name = $class->find_free_diskname($storeid, $scfg, $vmid)
> > + if !$name;
> > +
> > + my $tags = ["pve-vm-$vmid"];
> > + if ($name =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/) {
> > + push @$tags, "\@pve-$1";
> > + }
I don't like this part for two reasons:
1. without this, alloc_image is identical to LVMPlugin's (see above/below)
2. the way the snapshot is encoded in the volname means I can "pvesm alloc" something with a snapshot volname and break things..
I think this should be refactored:
- alloc_image should allow (only?) qcow2
- it should use a custom helper for the actual lvcreate, but be restricted to "proper" volume names
- it should use another helper for the qcow2 formatting
- volume_snapshot should use the same helpers, but call them with a different LV name
the same also applies to free_image, for similar reasons (don't allow to call free_image with a snapshot directly, but use a common helper for free_image and volume_snapshot_delete)
> > +
> > + PVE::Storage::LVMPlugin::lvcreate($vg, $name, $size, $tags);
> > +
> > + return $name;
> > +}
> > +
> > +sub volume_snapshot_info {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > +
> > + return $class->list_snapshots($scfg, $storeid, $volname);
why have two public subs for this? the $current_only would not be needed if the volume itself would also be the current snapshot..
> > +}
> > +
> > +sub activate_volume {
> > + my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
> > +
> > + my $lvm_activate_mode = 'ey';
> > + my $tag = undef;
$tag is undef
> > +
> > + #activate volume && all volumes snapshots by tag
> > + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/)
> > {
> > + $tag = "\@pve-vm-$4-disk-$5";
here only the disk itself is put into the tag, the optional snap part isn't.. and $snapname is ignored as well
> > + }
and what if the regex didn't match?
> > +
> > + my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $tag];
so this will only ever activate the "main" volume?
> > + run_command($cmd, errmsg => "can't activate LV '$tag'");
> > +
> > + $cmd = ['/sbin/lvchange', '--refresh', $tag];
> > + run_command($cmd, errmsg => "can't refresh LV '$tag' for
> > activation");
this should
- not use $volname to transfer $snapname, but pass in the volname contained in the volid
- use $snapname ;)
> > +}
> > +
> > +sub deactivate_volume {
> > + my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
> > +
> > + my $tag = undef;
> > + #deactivate volume && all volumes snasphots by tag
> > + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/)
> > {
> > + $tag = "\@pve-vm-$4-disk-$5";
> > + }
same as for activate_volume applies here as well..
> > +
> > + my $cmd = ['/sbin/lvchange', '-aln', $tag];
> > + run_command($cmd, errmsg => "can't deactivate LV '$tag'");
> > +}
> > +
> > +sub volume_resize {
> > + my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
> > +
> > + #we should resize the base image and parents snapshots,
> > + #but how to manage rollback ?
> > +
> > + die "can't resize if snasphots exist" if $class-
> > >snapshot_exist($scfg, $storeid, $volname);
I don't think qemu requires backing file and overlay sizes to agree - just that if you write back (commit) up the chain, you might need to resize the backing file to accommodate the additional data/space. so resizing should be fine (in theory at least)? also see the docs for `qemu-img commit`.
> > +
> > + return 1;
> > +}
> > +
> > +sub volume_snapshot {
> > + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> > +
> > + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> > +
> > + my $current_path = $class->path($scfg, $volname, $storeid);
> > + my $current_format =
> > (PVE::Storage::Plugin::file_size_info($current_path))[1];
> > + my $snappath = get_snap_volname($current_path, $snap);
> > +
> > + my $snapvolname = get_snap_volname($volname, $snap);
> > + #allocate lvm snapshot volume
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> > + $class->parse_volname($volname);
> > + my $size = $class->volume_size_info($scfg, $storeid, $volname,
> > 5);
> > + #add 100M for qcow2 headers
> > + $size = int($size/1024) + (100*1024);
a pointer where that 100M comes from would be nice ;)
> > +
> > + $class->alloc_image($storeid, $scfg, $vmid, 'raw', $snapvolname,
> > $size);
so this could instead use the regular alloc_image from stock LVM, and then set the tags here if we want to keep alloc_image as raw-only.. which I don't think we really want ;)
> > +
> > + # create the qcow2 fs
> > + eval {
> > + my $cmd = ['/usr/bin/qemu-img', 'create', '-b',
> > $current_path,
> > + '-F', $current_format, '-f', 'qcow2', $snappath];
> > + my $options = "extended_l2=on,";
> > + $options .=
> > PVE::Storage::Plugin::preallocation_cmd_option($scfg, 'qcow2');
> > + push @$cmd, '-o', $options;
> > + run_command($cmd);
> > + };
see comment for alloc_image above..
> > + if ($@) {
> > + eval { $class->free_image($storeid, $scfg, $snapvolname, 0) };
I guess this is okay, but it would read a bit cleaner if this would call volume_snapshot_delete..
> > + warn $@ if $@;
> > + }
> > +}
> > +
> > +# Asserts that a rollback to $snap on $volname is possible.
> > +# If certain snapshots are preventing the rollback and $blockers is
> > an array
> > +# reference, the snapshot names can be pushed onto $blockers prior
> > to dying.
> > +sub volume_rollback_is_possible {
> > + my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> > +
> > + my $path = $class->filesystem_path($scfg, $volname);
> > + my $snappath = get_snap_volname($path, $snap);
> > + my $currentpath = $class->path($scfg, $volname, $storeid);
> > + return 1 if $currentpath eq $snappath;
> > +
> > + die "can't rollback, '$snap' is not most recent snapshot on
> > '$volname'\n";
> > +
> > + return 1;
> > +}
same comments as for the dir-based patches apply here as well - if at all possible, having a 1:1 mapping of snapshot name to LV name would be great.. other than LVs not being hardlinkable, I think the same considerations apply there as well..
> > +
> > +sub volume_snapshot_rollback {
> > + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> > +
> > + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> > + #simply delete the current snapshot and recreate it
> > +
> > + my $snapvolname = get_snap_volname($volname, $snap);
> > +
> > + $class->free_image($storeid, $scfg, $snapvolname, 0);
> > + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
> > +}
> > +
> > +sub volume_snapshot_delete {
> > + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> > +
> > + return 1 if $running;
> > +
> > + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> > +
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + my $snappath = $snapshots->{$snap}->{file};
> > + if(!$snappath) {
> > + warn "$snap already deleted. skip\n";
> > + return;
> > + }
how can this happen? the snapshot info is generated by querying LVM for a list of LVs..
> > +
> > + my $snapvolname = $snapshots->{$snap}->{volname};
> > + my $parentsnap = $snapshots->{$snap}->{parent};
> > + my $childsnap = $snapshots->{$snap}->{child};
> > + die "error: can't find a parent for this snapshot" if
> > !$parentsnap;
but the first snapshot doesn't have a parent?
> > +
> > + my $parentpath = $snapshots->{$parentsnap}->{file};
> > + my $parentformat = $snapshots->{$parentsnap}->{'format'} if
> > $parentsnap;
> > + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap;
unless someone manually messed with the snapshot tree, in the current scheme any "snapshot" has a child?
> > + my $childformat = $snapshots->{$childsnap}->{'format'} if
> > $childsnap;
> > +
> > + print "merge snapshot $snap to $parentsnap\n";
> > + my $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> > + run_command($cmd);
> > +
> > + #if we delete an intermediate snapshot, we need to link upper
> > snapshot to base snapshot
> > + if($childpath && -e $childpath) {
> > + die "missing parentsnap snapshot to rebase child $childpath\n" if
> > !$parentpath;
> > + print "link $childsnap to $parentsnap\n";
> > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath, '-
> > F', $parentformat, '-f', $childformat, $childpath];
> > + run_command($cmd);
> > + }
same here, commit + rebase -u should be the same as rebase ?
> > +
> > + #delete the snapshot
> > + $class->free_image($storeid, $scfg, $snapvolname, 0);
> > +
> > + return;
> > +}
> > +
> > +sub volume_has_feature {
> > + my ($class, $scfg, $feature, $storeid, $volname, $snapname,
> > $running) = @_;
> > +
> > + my $features = {
> > + snapshot => { current => 1 },
> > +# clone => { base => 1, snap => 1}, #don't allow to clone as we
> > can't activate the base between different host ?
that's only true for shared LVM though, and the rest would also work for local LVM?
> > + template => { current => 1},
> > + copy => { base => 1, current => 1, snap => 1},
> > + sparseinit => { base => 1, current => 1},
> > + rename => {current => 1},
> > + };
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> > + $class->parse_volname($volname);
> > +
> > + my $key = undef;
> > + if($snapname){
> > + $key = 'snap';
> > + }else{
> > + $key = $isBase ? 'base' : 'current';
> > + }
> > + return 1 if $features->{$feature}->{$key};
> > +
> > + return undef;
> > +}
> > +
> > +sub get_snap_volname {
> > + my ($path, $snap) = @_;
> > +
> > + my $basepath = "";
> > + my $baseformat = "";
> > + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-
> > Z0-9]+))?(\.(raw|qcow2))?/) {
> > + $basepath = $1;
> > + $baseformat = $8;
> > + }
> > + my $snapvolname = $basepath."-snap-$snap.qcow2";
> > + return $snapvolname;
> > +}
> > +
> > +sub get_snapname_from_path {
> > + my ($path) = @_;
> > +
> > + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-zA-
> > Z0-9]+))?(\.(raw|qcow2))?/) {
> > + my $snapname = $7;
> > + return $snapname;
> > + }
> > + die "can't parse snapname from path $path";
> > +}
> > +
> > +sub get_current_snapshot {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > +
> > + #get more recent ctime volume
> > + return $class->list_snapshots($scfg, $storeid, $volname, 1);
> > +}
> > +my $check_tags = sub {
> > + my ($tags) = @_;
> > +
> > + return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
> > +};
> > +
> > +sub list_images {
> > + my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
> > +
> > + my $vgname = $scfg->{vgname};
> > +
> > + $cache->{lvs} = PVE::Storage::LVMPlugin::lvm_list_volumes() if
> > !$cache->{lvs};
> > +
> > + my $res = [];
> > +
> > + if (my $dat = $cache->{lvs}->{$vgname}) {
> > +
> > + foreach my $volname (keys %$dat) {
> > +
> > + next if $volname !~ m/^(vm|base)-(\d+)-/;
> > + my $owner = $2;
> > +
> > + my $info = $dat->{$volname};
> > +
> > + next if $scfg->{tagged_only} && !&$check_tags($info->{tags});
> > +
> > + # Allow mirrored and RAID LVs
> > + next if $info->{lv_type} !~ m/^[-mMrR]$/;
> > +
> > + my $volid = "$storeid:$volname";
> > +
> > + if ($vollist) {
> > + my $found = grep { $_ eq $volid } @$vollist;
> > + next if !$found;
> > + } else {
> > + next if defined($vmid) && ($owner ne $vmid);
> > + }
> > +
> > + push @$res, {
> > + volid => $volid, format => 'raw', size => $info->{lv_size}, vmid =>
> > $owner,
> > + ctime => $info->{ctime},
> > + };
but doesn't this now include all snapshot LVs as well? while pretending they are raw?
> > + }
> > + }
> > +
> > + return $res;
> > +}
> > +
> > +sub list_snapshots {
> > + my ($class, $scfg, $storeid, $volname, $current_only) = @_;
> > +
> > + my $vgname = $scfg->{vgname};
> > +
> > + my $basevolname = $volname;
> > + my $lvs = PVE::Storage::LVMPlugin::lvm_list_volumes($vgname);
this
> > +
> > + my $vg = $lvs->{$vgname};
and this seem to be unused?
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase,
> > $format) = $class->parse_volname($volname);
> > + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> > +
> > + my $info = {};
> > + for my $snap (@$snapshots) {
> > + my $snap_volid = $snap->{volid};
> > + next if ($snap_volid !~ m/$basevolname/);
same issue as with the dir patch - this allows partial matching if two volumes share a name prefix
> > +
> > + my $snapname = get_snapname_from_path($snap_volid);
> > + my (undef, $snap_volname) =
> > PVE::Storage::parse_volume_id($snap_volid);
> > + my $snapfile = $class->filesystem_path($scfg, $snap_volname,
> > $snapname);
> > + $snapname = 'base' if !$snapname;
> > + $info->{$snapname}->{file} = $snapfile;
> > + $info->{$snapname}->{volname} = $snap_volname;
> > + $info->{$snapname}->{volid} = $snap_volid;
> > + $info->{$snapname}->{ctime} = $snap->{ctime};
> > +
> > + if (!$current_only) {
> > + my (undef, $format, undef, $parentfile, undef) =
> > PVE::Storage::Plugin::file_size_info($snapfile);
> > + next if !$parentfile && $snapname ne 'base'; #bad unlinked
> > snasphot
> > +
> > + my $parentname = get_snapname_from_path($parentfile) if
> > $parentfile;
> > + $parentname = 'base' if !$parentname && $parentfile;
> > +
> > + $info->{$snapname}->{'format'} = $format;
> > + $info->{$snapname}->{parent} = $parentname if $parentname;
> > + $info->{$parentname}->{child} = $snapname if $parentname;
> > + }
> > + }
> > +
> > + my @snapshots_sorted = sort { $info->{$b}{ctime} <=> $info-
> > >{$a}{ctime} } keys %$info;
> > + my $current_snapname = $snapshots_sorted[0];
> > + my $current_snapshot = $info->{$current_snapname};
> > + return $current_snapshot if $current_only;
this (returning to hashes with different structure) is easy to miss and get wrong..
> > +
> > + $info->{current} = { %$current_snapshot };
especially if this is done anyway, so the caller can just look at that if they only want the current snapshot..
> > + return $info;
> > +}
> > +
> > +sub snapshot_exist {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > +
> > + my $basepath = $class->filesystem_path($scfg, $volname);
> > + my $currentpath = $class->path($scfg, $volname, $storeid);
> > +
> > + die "can't resize if snasphots exist" if $currentpath ne
> > $basepath;
I think something here is wrong ;)
> > +
> > +}
> > +1;
> > diff --git a/src/PVE/Storage/Makefile b/src/PVE/Storage/Makefile
> > index d5cc942..1af8aab 100644
> > --- a/src/PVE/Storage/Makefile
> > +++ b/src/PVE/Storage/Makefile
> > @@ -14,7 +14,8 @@ SOURCES= \
> > PBSPlugin.pm \
> > BTRFSPlugin.pm \
> > LvmThinPlugin.pm \
> > - ESXiPlugin.pm
> > + ESXiPlugin.pm \
> > + LvmQcow2Plugin.pm
> >
> > .PHONY: install
> > install:
> > --
> > 2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot
[not found] ` <aeb9b8ea34826483eabe7fec5e2c12b1e22e132f.camel@groupe-cyllene.com>
@ 2024-10-24 7:43 ` Fabian Grünbichler
0 siblings, 0 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-24 7:43 UTC (permalink / raw)
To: DERUMIER, Alexandre, pve-devel
> DERUMIER, Alexandre <alexandre.derumier@groupe-cyllene.com> hat am 23.10.2024 16:31 CEST geschrieben:
>
>
> >>if we want the current volume to keep its name, and the snapshot
> >>volume to actually contain *that* snapshot's data, we need some sort
> >>of rename dance here as well.. i.e., rename the current volume to
> >>have the snapshot volume name, then snapshot it back into the
> >>"current" name. not sure what the proper qmp runes would be to
> >>achieve that?
>
> I really to check that, but I would like to keep it as most atomic than
> possible (avoid stream, double snapshot, and all fancy stuff just to
> take a snapshot. because shit happen ^_^ , and generally it'll happen
> when you'll take a snapshot with multiple disk, you'll to manage
> recovery and current state of differents volumes)
>
> Another stupid way is to use generic name for snapfile (maybe with
> ctime inside the name), and create symlinks when snapshot is taken.(
> qemu follow symlink and use realpaths for backing chain).
> (and for lvm, it can be done with metadatas).
>
> I'll really try to find a way with renaming volume, I'll keep you in
> touch.
yes, that might work as well. it's a bit less "clean", since looking at the storage itself then requires this extra knowledge to know what's what, but it shouldn't be too bad hopefully? curious to see what you come up with :)
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-10-23 10:12 ` Fabian Grünbichler
2024-10-23 12:59 ` DERUMIER, Alexandre via pve-devel
[not found] ` <f066c13a25b30e3107a9dec8091b456ce2852293.camel@groupe-cyllene.com>
@ 2024-10-24 7:50 ` Fabian Grünbichler
2 siblings, 0 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-24 7:50 UTC (permalink / raw)
To: Proxmox VE development discussion
> Fabian Grünbichler <f.gruenbichler@proxmox.com> hat am 23.10.2024 12:12 CEST geschrieben:
>
>
> some high level comments:
>
> I am not sure how much we gain here with the raw support? it's a bit confusing to have a volid ending with raw, with the current volume and all but the first snapshot actually being stored in qcow2 files, with the raw file being the "oldest" snapshot in the chain..
>
> if possible, I'd be much happier with the snapshot name in the snapshot file being a 1:1 match, see comments inline
> - makes it a lot easier to understand (admin wants to manually remove snapshot "foo", if "foo" was the last snapshot then right now the volume called "foo" is actually the current contents!)
> - means we don't have to do lookups via the full snapshot list all the time (e.g., if I want to do a full clone from a snapshot "foo", I can just pass the snap-foo volume to qemu-img)
>
> the naming scheme for snapshots needs to be adapted to not clash with regular volumes:
>
> $ pvesm alloc extsnap 131314 vm-131314-disk-foobar.qcow2 2G
> Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-foobar.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=2147483648 lazy_refcounts=off refcount_bits=16
> successfully created 'extsnap:131314/vm-131314-disk-foobar.qcow2'
> $ qm rescan --vmid 131314
> rescan volumes...
> can't parse snapname from path at /usr/share/perl5/PVE/Storage/Plugin.pm line 1934.
>
> storage_migrate needs to handle external snapshots, or at least error out. I haven't tested that part or linked clones or a lot of other advanced related actions at all ;)
I'll add some more high-level comments (the threading seems to be broken for some reason, so I'll use this as "entrypoint"):
- snapext should probably be fixed for dir-type storages as well
- the volume ID should be static for both plugins, snapshots should be encoded on the storage layer in a fashion that doesn't "break through" to the API layers and makes it impossible to confuse the "main" volname with snapshots:
-- alloc_image shouldn't be able to allocate a volume that is then interpreted as snapshot
-- free_image shouldn't be able to free a snapshot volume directly
-- listing images should never return snapshots
-- ..
- for the LVM part, snapshots still require allocation a full-sized volume+some overhead for the qcow2 each. should we attempt to shrink them once they become read-only? in practice, only the LV backing the top image needs to be full-sized.. how do we ensure the underlying storage doesn't waste all that "empty" space?
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-10-24 6:42 ` Fabian Grünbichler
@ 2024-10-24 7:59 ` Giotta Simon RUAGH via pve-devel
2024-10-24 9:48 ` Fabian Grünbichler
2024-10-25 5:52 ` DERUMIER, Alexandre via pve-devel
1 sibling, 1 reply; 27+ messages in thread
From: Giotta Simon RUAGH via pve-devel @ 2024-10-24 7:59 UTC (permalink / raw)
To: Proxmox VE development discussion, DERUMIER, Alexandre; +Cc: Giotta Simon RUAGH
[-- Attachment #1: Type: message/rfc822, Size: 39436 bytes --]
From: Giotta Simon RUAGH <Simon.Giotta@ruag.ch>
To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>, "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
Subject: RE: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
Date: Thu, 24 Oct 2024 07:59:13 +0000
Message-ID: <c6544b5f333c4e2289f62b2c32f06134@ruag.ch>
Hi Everyone
> I mean, if we don't allow .raw files to be snapshotted then this problem doesn't exist ;)
Quick comment from the bleacher; Adding a mechanism to shapshot raw disks might solve the TPM (tpmstate) snapshotting issue, as well as allowing containers to be snapshot.
For context:
When using a storage that does not natively support snapshotting (NFS on NetApp or similar enterprise storage, in particular), raw disks cannot be snapshot.
Since tpmstate disks can only be stored as raw (as I understand they are just a binary blob?), this makes it impossible to snapshot or (link-)clone any VMs that have a TPM. This especially is an issue for current Windows clients.
Same issue for LXC containers, as their storage format is raw only as well.
https://bugzilla.proxmox.com/show_bug.cgi?id=4693
Beste Grüsse
Simon Giotta
Systemadministrator
simon.giotta@ruag.ch
RUAG AG
Schaffhauserstrasse 580 | 8052 Zürich
-----Original Message-----
From: pve-devel <pve-devel-bounces@lists.proxmox.com> On Behalf Of Fabian Grünbichler
Sent: Donnerstag, 24. Oktober 2024 08:42
To: DERUMIER, Alexandre <alexandre.derumier@groupe-cyllene.com>; pve-devel@lists.proxmox.com
Subject: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
> DERUMIER, Alexandre <alexandre.derumier@groupe-cyllene.com> hat am 23.10.2024 14:59 CEST geschrieben:
>
>
> Hi Fabian,
>
> thanks for the review !
>
> >>-------- Message initial --------
> >>De: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> >>À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
> >>Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
> >>Objet: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external
> >>snasphot support
> >>Date: 23/10/2024 12:12:46
> >>
> >>some high level comments:
> >>
> >>I am not sure how much we gain here with the raw support?
>
> They are really qcow2 overhead, mostly with big disk.
> as for good performance, the qcow2 l2-cache-size need to be keeped in
> memory (and it's 1MB by disk)
> https://events.static.linuxfound.org/sites/events/files/slides/kvm-for
> um-2017-slides.pdf
>
> Hopefully, they are improvments with the "new" sub-cluster feature
> https://people.igalia.com/berto/files/kvm-forum-2020-slides.pdf
> I'm already using it at snapshot create, but I think we should also
> use it for main qcow2 volume.
>
>
> But even with that, you can still have performance impact.
> So yes, I think they are really usecase for workload when you only
> need snapshot time to time (before an upgrade for example), but max
> performance with no snaphot exist.
my main point here is - all other storages treat snapshots as "cheap". if you combine raw+qcow2 snapshot overlays, suddenly performance will get worse if you keep a snapshot around for whatever reason..
> >> it's a bit confusing to have a volid ending with raw, with the
> >>current volume and all but the first snapshot actually being stored
> >>in qcow2 files, with the raw file being the "oldest" snapshot in the
> >>chain..
> if it's too confusing, we could use for example an .snap extension.
> (as we known that it's qcow2 behind)
I haven't thought yet about how to encode the snapshot name into the snapshot file name, but yeah, maybe something like that would be good. or maybe snap-VMID-disk-DISK.qcow2 ?
> if possible, I'd be much happier with the snapshot name in the
> snapshot file being a 1:1 match, see comments inline
>
> >>- makes it a lot easier to understand (admin wants to manually
> >>remove snapshot "foo", if "foo" was the last snapshot then right now
> >>the volume called "foo" is actually the current contents!)
>
> This part is really difficult, because you can't known in advance the
> name of the snapshot you'll take in the future. The only way could be
> to create a "current" volume , rename it when you took another
> snasphot (I'm not sure it's possible to do it live, and this could
> break link chain too)
>
> Also, I don't known how to manage the main volume, when you take the
> first snapshot ? we should rename it too.
I mean, if we don't allow .raw files to be snapshotted then this problem doesn't exist ;)
> so "vm-disk-100-disk-0.raw|qcow2" , become "vm-disk-100-disk-0-
> snap1.(raw|qcow2)" + new "vm-disk-100-disk-0-current.qcow2" ?
the volid changing on snapshot seems like it would require a lot of adaption.. OTOH, the volid containing a wrong format might also break things.
> I'll try to do test again to see what is possible.
>
>
>
>
> >>- means we don't have to do lookups via the full snapshot list all
> >>the time (e.g., if I want to do a full clone from a snapshot "foo",
> >>I can just pass the snap-foo volume to qemu-img)
>
> ok got it
>
>
>
> >>the naming scheme for snapshots needs to be adapted to not clash
> >>with regular volumes:
>
> >>$ pvesm alloc extsnap 131314 vm-131314-disk-foobar.qcow2 2G
> >>Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
> >>foobar.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off
> >>preallocation=off compression_type=zlib size=2147483648
> >>lazy_refcounts=off refcount_bits=16 successfully created
> >>'extsnap:131314/vm-131314-disk-foobar.qcow2'
> >>$ qm rescan --vmid 131314
> >>rescan volumes...
> >>can't parse snapname from path at
> >>/usr/share/perl5/PVE/Storage/Plugin.pm line 1934.
>
> any preference for naming scheme ? for lvm external snap, I have used
> "vm-131314-disk-0-snap-<foobar>";
see above
> >>storage_migrate needs to handle external snapshots, or at least
> >>error out.
> it should already work. (I have tested move_disk, and live migration +
> storage migration). qemu_img_convert offline and qemu block job for
> live.
but don't all of those lose the snapshots? did you test it with snapshots and rollback afterwards?
> >>I haven't tested that part or linked clones or a lot of other
> >>advanced related actions at all ;)
>
> For linked clone, we can't have a base image with snapshots (other
> than _base_). so It'll be safe.
ack
> > Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat
> > am
> > 30.09.2024 13:31 CEST geschrieben:
> > Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-
> > cyllene.com>
> > ---
> > src/PVE/Storage/DirPlugin.pm | 1 +
> > src/PVE/Storage/Plugin.pm | 225
> > +++++++++++++++++++++++++++++++--
> > --
> > 2 files changed, 201 insertions(+), 25 deletions(-)
> >
> > diff --git a/src/PVE/Storage/DirPlugin.pm
> > b/src/PVE/Storage/DirPlugin.pm index 2efa8d5..2bef673 100644
> > --- a/src/PVE/Storage/DirPlugin.pm
> > +++ b/src/PVE/Storage/DirPlugin.pm
> > @@ -80,6 +80,7 @@ sub options {
> > is_mountpoint => { optional => 1 },
> > bwlimit => { optional => 1 },
> > preallocation => { optional => 1 },
> > + snapext => { optional => 1 },
> > };
> > }
> >
> > diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> > index 6444390..5e5197a 100644
> > --- a/src/PVE/Storage/Plugin.pm
> > +++ b/src/PVE/Storage/Plugin.pm
> > @@ -214,6 +214,11 @@ my $defaultData = {
> > maximum => 65535,
> > optional => 1,
> > },
> > + 'snapext' => {
> > + type => 'boolean',
> > + description => 'enable external snapshot.',
> > + optional => 1,
> > + },
> > },
> > };
> >
> > @@ -695,7 +700,7 @@ sub get_subdir {
> > }
> >
> > sub filesystem_path {
> > - my ($class, $scfg, $volname, $snapname) = @_;
> > + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
>
> see comment below
>
> >
> > my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
> > $class->parse_volname($volname);
> > @@ -703,7 +708,7 @@ sub filesystem_path {
> > # Note: qcow2/qed has internal snapshot, so path is always
> > # the same (with or without snapshot => same file).
> > die "can't snapshot this image format\n"
> > - if defined($snapname) && $format !~ m/^(qcow2|qed)$/;
> > + if defined($snapname) && !$scfg->{snapext} && $format !~
> > m/^(qcow2|qed)$/;
> >
> > my $dir = $class->get_subdir($scfg, $vtype);
> >
> > @@ -711,13 +716,22 @@ sub filesystem_path {
> >
> > my $path = "$dir/$name";
> >
> > + if($scfg->{snapext}) {
> > + my $snappath = get_snap_path($path, $snapname);
> > + if($snapname) {
> > + $path = $snappath;
> > + } elsif ($current_snap) {
> > + $path = $current_snap->{file}; }
> > + }
>
> see commente below
>
> > return wantarray ? ($path, $vmid, $vtype) : $path;
> > }
> >
> > sub path {
> > my ($class, $scfg, $volname, $storeid, $snapname) = @_;
> >
> > - return $class->filesystem_path($scfg, $volname, $snapname);
> > + my $current_snapshot = $class->get_current_snapshot($scfg,
> > $storeid, $volname);
>
> >>this is pretty expensive, and would only be needed if $snapname is
> >>not set..
>
> The main problem is when you start a vm on a specific snasphot, we
> don't send the $snapname param.
>
> One way could be that qemu-server check the current snapshot from
> config when doing specific action like start.
if we manage to find a way to make the volid always point at the top overlay, then that wouldn't be needed..
> > + return $class->filesystem_path($scfg, $volname, $snapname,
> > $current_snapshot);
>
> >>couldn't we avoid extending the signature of filesystem_path and
> >>just
> pass the name of the current snapshot as $snapname?
>
> I need to redo test, I don't remember why I have splitted them, but
> you are right, it should be cleaner.
>
> > }
> >
> > sub create_base {
> > @@ -1074,13 +1088,31 @@ sub volume_resize {
> > sub volume_snapshot {
> > my ($class, $scfg, $storeid, $volname, $snap) = @_;
> >
> > - die "can't snapshot this image format\n" if $volname !~
> > m/\.(qcow2|qed)$/;
> > + die "can't snapshot this image format\n" if $volname !~
> > m/\.(raw|qcow2|qed)$/;
> >
> > - my $path = $class->filesystem_path($scfg, $volname);
> > + die "external snapshot need to be enabled to snapshot .raw
> > volumes\n" if !$scfg->{snapext};
>
> >>this condition is definitely wrong - it means no more snapshotting
> >>unless external snapshot support is enabled..
>
> oops, sorry.
>
> >
> > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> > + if($scfg->{snapext}) {
> >
> > - run_command($cmd);
> > + my $path = $class->path($scfg, $volname, $storeid);
> > +
> > + my $snappath = get_snap_path($path, $snap); my $format =
> > + ($class->parse_volname($volname))[6];
> > +
> > + my $cmd = ['/usr/bin/qemu-img', 'create', '-b', $path,
> > + '-F', $format, '-f', 'qcow2', $snappath];
>
> >>see comments on qemu-server, but.. wouldn't it be better if the file
> >>with $snap in its name would be the one storing that snapshot's data?
> >>i.e., rename the "current" volume to be called ...-$snap... , and
> >>then create a new "current" file without a suffix with the renamed
> >>volume as backing file?
>
> I'll try it !
>
> > +
> > + my $options = "extended_l2=on,";
> > + $options .= preallocation_cmd_option($scfg, 'qcow2'); push @$cmd,
> > + '-o', $options; run_command($cmd);
> > +
> > + } else {
> > +
> > + my $path = $class->filesystem_path($scfg, $volname); my $cmd =
> > +['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> > +run_command($cmd);
> > + }
> >
> > return undef;
> > }
> > @@ -1091,19 +1123,39 @@ sub volume_snapshot {
> > sub volume_rollback_is_possible {
> > my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> >
> > + if ($scfg->{snapext}) {
> > + #technically, we could manage multibranch, we it need lot more
> > +work
> > for snapshot delete
>
> >>would multibranch be easier if there is a simple 1:1 correspondence
> >>between snapshots and their filenames?
> >>
> >>switching to a different part of the "hierarchy" is then just
> >>- delete current volume
> >>- create new current volume using rollback target as backing file
> the rollback/branch switch is not too difficult, maybe 1:1 naming
> could help.
>
> >>I guess deletion does become harder then, since it potentially
> >>requires multiple rebases..
>
> yes, The biggest difficulty is snapshot delete, as you need to create
> a block-stream job, mergin/writing to each branch child, and you need
> to do it atomically with a transaction with multiple jobs.
> So yes, it's possible, but I wanted to keep it easy for now.
sure, this restriction could be lifted in a follow-up!
> > + my $path = $class->filesystem_path($scfg, $volname); my $snappath
> > + = get_snap_path($path, $snap);
> > +
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + my $currentpath = $snapshots->{current}->{file}; return 1 if !-e
> > + $snappath || $currentpath eq $snappath;
> > +
> > + die "can't rollback, '$snap' is not most recent snapshot on
> > '$volname'\n";
> > + }
> > +
> > return 1;
> > }
> >
> > sub volume_snapshot_rollback {
> > my ($class, $scfg, $storeid, $volname, $snap) = @_;
> >
> > - die "can't rollback snapshot this image format\n" if $volname
> > !~ m/\.(qcow2|qed)$/;
> > + die "can't rollback snapshot this image format\n" if $volname
> > +!~
> > m/\.(raw|qcow2|qed)$/;
> >
> > - my $path = $class->filesystem_path($scfg, $volname);
> > + die "external snapshot need to be enabled to rollback snapshot
> > .raw volumes\n" if $volname =~ m/\.(raw)$/ && !$scfg->{snapext};
> >
> > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> > + my $path = $class->filesystem_path($scfg, $volname);
> >
> > - run_command($cmd);
> > + if ($scfg->{snapext}) {
> > + #simply delete the current snapshot and recreate it my $snappath
> > += get_snap_path($path, $snap); unlink($snappath);
> > +$class->volume_snapshot($scfg, $storeid, $volname, $snap);
>
> this *reads* so weird ;) it is right given the current semantics
> (current snapshot == live image, snapshot data actually stored in
> parent snapshot)
>
> > + } else {
> > + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> > +run_command($cmd);
> > + }
> >
> > return undef;
> > }
> > @@ -1111,17 +1163,50 @@ sub volume_snapshot_rollback {
> > sub volume_snapshot_delete {
> > my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> >
> > - die "can't delete snapshot for this image format\n" if $volname
> > !~ m/\.(qcow2|qed)$/;
> > + die "can't delete snapshot for this image format\n" if $volname
> > !~ m/\.(raw|qcow2|qed)$/;
> > +
> > + die "external snapshot need to be enabled to delete snapshot of
> > .raw volumes\n" if !$scfg->{snapext};
> >
> > return 1 if $running;
> >
> > - my $path = $class->filesystem_path($scfg, $volname);
> > + my $cmd = "";
> > + if ($scfg->{snapext}) {
> > +
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + my $snappath = $snapshots->{$snap}->{file}; return if !-e
> > + $snappath; #already deleted ?
> > +
> > + my $parentsnap = $snapshots->{$snap}->{parent}; my $childsnap =
> > +$snapshots->{$snap}->{child};
> > + die "error: can't find a parent for this snapshot" if
> > !$parentsnap;
> >
> > - $class->deactivate_volume($storeid, $scfg, $volname, $snap,
> > {});
> > + my $parentpath = $snapshots->{$parentsnap}->{file};
> > + my $parentformat = $snapshots->{$parentsnap}->{'format'} if
> > $parentsnap;
> > + my $childpath = $snapshots->{$childsnap}->{file} if $childsnap; my
> > + $childformat = $snapshots->{$childsnap}->{'format'} if
> > $childsnap;
> >
> > - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> > + print "merge snapshot $snap to $parentsnap\n"; $cmd =
> > + ['/usr/bin/qemu-img', 'commit', $snappath]; run_command($cmd);
> > +
> > + #if we delete an intermediate snapshot, we need to link upper
> > snapshot to base snapshot
> > + if($childpath && -e $childpath) {
> > + die "missing parentsnap snapshot to rebase child $childpath\n"
> > if !$parentpath;
> > + print "link $childsnap to $parentsnap\n";
> > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b',
> > + $parentpath,
> > '-F', $parentformat, '-f', $childformat, $childpath];
> > + run_command($cmd);
> > + }
>
> >>wouldn't a regular safe rebase work just as well, instead of commit
> >>+ unsafe rebase? if there is no parent, passing in "" as "new"
> >>backing file should work..
>
> I'll test it, but I'm pretty sure this is the correct way.
>
> > +
> > + #delete the snapshot
> > + unlink($snappath);
> > + } else {
> > + my $path = $class->filesystem_path($scfg, $volname);
> >
> > - run_command($cmd);
> > + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> > +
> > + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> > +run_command($cmd);
> > + }
> >
> > return undef;
> > }
> > @@ -1140,10 +1225,6 @@ sub volume_has_feature {
> > my ($class, $scfg, $feature, $storeid, $volname, $snapname,
> > $running, $opts) = @_;
> >
> > my $features = {
> > - snapshot => {
> > - current => { qcow2 => 1 },
> > - snap => { qcow2 => 1 },
> > - },
> > clone => {
> > base => { qcow2 => 1, raw => 1, vmdk => 1 },
> > },
> > @@ -1159,11 +1240,23 @@ sub volume_has_feature {
> > base => { qcow2 => 1, raw => 1, vmdk => 1 },
> > current => { qcow2 => 1, raw => 1, vmdk => 1 },
> > },
> > - rename => {
> > - current => {qcow2 => 1, raw => 1, vmdk => 1},
> > - },
> > + 'rename' => {
> > + current => { qcow2 => 1, raw => 1, vmdk => 1}, }
> > };
> >
> > + if ($scfg->{snapext}) {
> > + $features->{snapshot} = {
> > + current => { raw => 1, qcow2 => 1 },
> > + snap => { raw => 1, qcow2 => 1 },
> > + }
> > + } else {
> > + $features->{snapshot} = {
> > + current => { qcow2 => 1 },
> > + snap => { qcow2 => 1 },
> > + };
> > + }
> > +
>
> >>this could just leave $features as it is, and add the "raw" bits:
> >>
> >>if ($scfg->{snapext}) {
> >> $features->{snapshot}->{current}->{raw} = 1;
> >> $features->{snapshot}->{snap}->{raw} = 1;
> >>}
>
> ok !
> > if ($feature eq 'clone') {
> > if (
> > defined($opts->{valid_target_formats})
> > @@ -1222,7 +1315,9 @@ sub list_images {
> > }
> >
> > if ($vollist) {
> > - my $found = grep { $_ eq $volid } @$vollist;
> > + my $search_volid = $volid;
> > + $search_volid =~ s/-snap-.*\./\./;
> > + my $found = grep { $_ eq $search_volid } @$vollist;
> > next if !$found;
> > }
> >
> > @@ -1380,7 +1475,53 @@ sub status {
> > sub volume_snapshot_info {
> > my ($class, $scfg, $storeid, $volname) = @_;
> >
> > - die "volume_snapshot_info is not implemented for $class";
> > + die "volume_snapshot_info is not implemented for $class" if
> > !$scfg->{snapext};
> > +
> > + my $path = $class->filesystem_path($scfg, $volname);
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase,
> > $format) = $class->parse_volname($volname);
> > +
> > + my $basevolname = $volname;
> > + $basevolname =~ s/\.(raw|qcow2)$//;
> > +
> > + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> > + my $info = {};
> > + for my $snap (@$snapshots) {
> > +
> > + my $volid = $snap->{volid};
> > + next if ($volid !~ m/$basevolname/);
>
> >>this regex is broken w.r.t. partial matching!
> >>
> >>e.g., if a VM has both a disk -1.qcow2 and -11.qcow2 and I attempt to
> >>snapshot it using external snapshots:
> ok !
>
>
> snapshotting 'drive-scsi0' (extsnap:131314/vm-131314-disk-0.raw)
> Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-
> test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on
> preallocation=off compression_type=zlib size=200704
> backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-0-snap-
> test.qcow2 backing_fmt=raw lazy_refcounts=off refcount_bits=16
> snapshotting 'drive-scsi1' (extsnap:131314/vm-131314-disk-1.qcow2)
> Formatting '/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-
> test2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=on
> preallocation=off compression_type=zlib size=2147483648
> backing_file=/mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
> 11.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
> snapshotting 'drive-scsi2' (extsnap:131314/vm-131314-disk-11.qcow2)
> qemu-img: /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-
> test2.qcow2: Error: Trying to create an image with the same filename as
> the backing file
> snapshot create failed: starting cleanup
> merge snapshot test2 to test
> Image committed.
> merge snapshot test2 to base
> Image committed.
> TASK ERROR: command '/usr/bin/qemu-img create -b
> /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-11-snap-test2.qcow2
> -F qcow2 -f qcow2 /mnt/pve/ext4/extsnap/images/131314/vm-131314-disk-
> 11-snap-test2.qcow2 -o 'extended_l2=on,preallocation=off'' failed: exit
> code 1
>
> > +
> > + my (undef, $snapvolname) = parse_volume_id($volid);
> > + my $snapname = get_snapname_from_path($volid);
> > + my $snapfile = $class->filesystem_path($scfg, $snapvolname,
> > $snapname);
> > + $snapname = 'base' if !$snapname;
> > +
> > + my $format = $snap->{'format'};
> > + my $parentfile = $snap->{parent};
> > + my $parentname = get_snapname_from_path($parentfile) if
> > $parentfile;
> > + $parentname = 'base' if !$parentname && $parentfile;
> > +
> > + $info->{$snapname}->{file} = $snapfile;
> > + $info->{$snapname}->{volid} = $volid;
> > + $info->{$snapname}->{'format'} = $format;
> > + $info->{$snapname}->{parent} = $parentname if $parentname;
> > + $info->{$parentname}->{child} = $snapname if $parentname;
> > + }
> > +
> > + my $current = undef;
> > + for my $id (keys %$info) {
> > + my $snap = $info->{$id};
> > + die "error: snap $id: you can't have multiple current snapshot:
> > current:$current\n" if !$snap->{child} && $current;
> > + $current = $id if !$snap->{child};
> > + }
> > +
> > + if ($current) {
> > + $info->{current}->{file} = $info->{$current}->{file};
> > + $info->{current}->{'format'} = $info->{$current}->{'format'};
> > + $info->{current}->{parent} = $info->{$current}->{parent};
> > + }
> > +
> > + return $info;
> > }
> >
> > sub activate_storage {
> > @@ -1764,4 +1905,38 @@ sub config_aware_base_mkdir {
> > }
> > }
> >
> > +sub get_snap_path {
> > + my ($path, $snap) = @_;
> > +
> > + my $basepath = "";
> > + my $baseformat = "";
> > + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-
> > (.*))?\.(raw|qcow2)/) {
>
> >>this regex is wrong - volumes can have arbitrary names after the -
> >>disk- part..
>
> ah sorry. do you have some example where it's used ? (maybe for efi or
> other specific disk ?)
no, any vdisk can have (almost) anything after the -disk- part. you can allocate such volumes using `pvesm alloc` or the API (we just are not very good at keeping those custom suffixes when moving/migrating/.. ;))
> > + $basepath = $1;
> > + $baseformat = $8;
> > + }
> > + my $format = $snap ? 'qcow2' : $baseformat;
> > + my $snappath = $snap ? $basepath."-snap-$snap.$format" : undef;
> > +
> > + return $snappath;
> > +}
> > +
> > +sub get_snapname_from_path {
> > + my ($path) = @_;
> > +
> > + if ($path =~ m/^((.*)(vm-(\d+)-disk-(\d+)))(-snap-
> > (.*))?\.(raw|qcow2)/) {
>
> >>here as well.. and this whole helper is just used twice in
> >>volume_snapshot_info, maybe it could be inlined or made private
> ok !
>
>
> > + my $snapname = $7;
> > + return $snapname;
> > + }
> > + die "can't parse snapname from path";
> > +}
> > +
> > +sub get_current_snapshot {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > + #IMPROVE ME: faster way to find current snapshot? (search the
> > most recent created snapshot file ? need to works with lvm volume
> > too)
> > +
> > + return if !$scfg->{snapext};
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + return $snapshots->{current};
> > +}
> > +
> > 1;
> > --
> > 2.39.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-10-24 7:59 ` Giotta Simon RUAGH via pve-devel
@ 2024-10-24 9:48 ` Fabian Grünbichler
2024-10-25 20:04 ` DERUMIER, Alexandre via pve-devel
[not found] ` <7974c74b2d3a85086e8eda76e52d7a2c58d1dcb9.camel@groupe-cyllene.com>
0 siblings, 2 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-24 9:48 UTC (permalink / raw)
To: Proxmox VE development discussion, DERUMIER, Alexandre
> Giotta Simon RUAGH via pve-devel <pve-devel@lists.proxmox.com> hat am 24.10.2024 09:59 CEST geschrieben:
> > I mean, if we don't allow .raw files to be snapshotted then this problem doesn't exist ;)
>
> Quick comment from the bleacher; Adding a mechanism to shapshot raw disks might solve the TPM (tpmstate) snapshotting issue, as well as allowing containers to be snapshot.
>
> For context:
> When using a storage that does not natively support snapshotting (NFS on NetApp or similar enterprise storage, in particular), raw disks cannot be snapshot.
> Since tpmstate disks can only be stored as raw (as I understand they are just a binary blob?), this makes it impossible to snapshot or (link-)clone any VMs that have a TPM. This especially is an issue for current Windows clients.
> Same issue for LXC containers, as their storage format is raw only as well.
>
> https://bugzilla.proxmox.com/show_bug.cgi?id=4693
no it does not - with the mechanisms proposed in this patch series, only the initial volume can be raw, if it is snapshotted, the overlays are qcow2. so anything reading from the volume needs qcow2 support, including swtpm. that's why containers are not on the table for now either..
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
2024-10-24 7:42 ` Fabian Grünbichler
@ 2024-10-24 11:01 ` DERUMIER, Alexandre via pve-devel
0 siblings, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-24 11:01 UTC (permalink / raw)
To: pve-devel, f.gruenbichler; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 53155 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>, "f.gruenbichler@proxmox.com" <f.gruenbichler@proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot)
Date: Thu, 24 Oct 2024 11:01:03 +0000
Message-ID: <d3e6af3b66d993263e1163f47cbc3173d3184f33.camel@groupe-cyllene.com>
Thanks Fabian for your time !
I have tried to respond as much as possible. (I'm going to Holiday for
1 week tomorrow, so sorry if I don't reply to all your comments)
-------- Message initial --------
De: Fabian Grünbichler <f.gruenbichler@proxmox.com>
À: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>, pve-
devel@lists.proxmox.com <pve-devel@lists.proxmox.com>
Objet: Re: [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin:
(lvm with external qcow2 snapshot)
Date: 24/10/2024 09:42:00
> DERUMIER, Alexandre <alexandre.derumier@groupe-cyllene.com> hat am
> 23.10.2024 15:45 CEST geschrieben:
>
>
> > > I am not yet convinced this is somehow a good idea, but maybe you
> > > can
> > > convince me otherwise ;)
>>I maybe judged this too quickly - I thought this was combining LVM +
>>a dir-based storage, but this is putting the qcow2 overlays on LVs,
>>which I missed on the first pass!
Ah ok ! ^_^ yes, this is really 100% lvm (for shared lvm)
> > > variant A: this is just useful for very short-lived snapshots
> > > variant B: these snapshots are supposed to be long-lived
>
> Can you defined "short "/ "long" ? and the different usecase ?
>
> because for me, a snapshot is a snapshot. Sometime I take a snapshot
> before doing some critical changes, but I can't known if I need to
> rollback in next 2h, or next month.
>>yes, this would be an example of a short-lived snapshot
ok
> I think that "long-lived" usecase is backup (but we don't need it),
> or replication (this could apply here, if we want to add replication
> for disaster recovery)
>>backup would also be short-lived usually (the snapshot is just to
>>take the backup, not to keep a backup). long-lived usually is
>>something like "take daily snapshot and keep for a few weeks for file
>>recovery", in addition to regular backups. or "snapshot because we
>>just had an incidence and might need this for forensics in a few
>>months" (can also be solved with backups, of course ;)).
>>the main difference between the two is - for short-lived snapshots
>>performance implications related to snapshots existing are not that
>>important. I can live with a few hours of degraded performance, if
>>the snapshot is part of some important process/work flow. with long-
>>lived snapshots there is a requirement for them to not hurt
>>performance just by existing, because otherwise you can't use them.
>>there is a second reason why long-lived snapshots can be impossible
ok, so here, with qcow2 format, performance shouldn't be a problem.
(That's the whole point of this patch, using qcow2 format instead basic
slow lvm snasphot)
>>if you need to decide up-front how "big" the delta of that snapshot
>>can grow at most, then in PVE context, you always need to allocate
>>the full volume size (regular thick LVM had both issues - bad
>>performance, and new writes going into a thick snapshot volume).
about thick snaphot volume, technically, it could be possible to create
smaller lvm volume than the qcow2 virtual-size, and dynamically extend
it. ovirt is doing it like this. (I nave send a prelimary patch in
september, but for now, I'll like to keep it simple with thick
snapshots volume).
>>if you can support long-lived snapshots, then you automatically also
>>support short-lived snapshots. the other direction doesn't hold.
>>since PVE only has one kind of snapshots, they need to be useful for
>>long-lived snapshots.
ok got it.
> > > A is not something we want. we intentionally don't have non-thin
> > > LVM
> > > snapshots for example.
>
> AFAIK, we never had implemented it because LVM snasphot is slow as
> hell.(as a lvm extent are around 4MB, if you want 4k on a snapshot,
> you
> need to reread and rewrite the 4MB, so around 1000x over-
> amplification and slow iops)
>>see above - there's two issues, one is performance, the other is that
>>you need to either
>>- make the snapshot smaller than the original volume (and risk
>>running out of space)
>>- make the snapshot as big as the original volume (and blow up space
>>requirements)
>>
>>(thick) LVM snapshots basically barely work for the "take a
>>consistent backup during quiet periods" use case, and not much else.
> This is really the main blocker for my customers migrating from
> vmware
> (and to be honest I have some of them going to oracle ovlm (with
> ovirt), because ovirt support it this way).
> > > B once I create a single snapshot, the "original" storage only
> > > contains the data written up to that point, anything else is
> > > stored
> > > on the "snapshot" storage. this means my snapshot storage must be
> > > at
> > > least as fast/good/shared/.. as my original storage. in that
> > > case, I
> > > can just use the snapshot storage directly and ditch the original
> > > storage?
>
> Sorry, but I don't understand why you are talking about
> original/snapshot storage ? I never have thinked to use another
> storage
> for external snapshot.
>
> The patch is really to add external snapshot on same lvm storage,
> through lvm additional volume, but with qcow2 format to have good
> performance (vs slow lvm snapshot)
see above - I misread and answered too quickly.
>>I took a closer look and replied inline below with some comments -
>>much of it mimics the comments for the dir plugin..
(I'll do a full rework with snapshotname like for dir plugin)
> > Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-
> > cyllene.com>
> > ---
> > src/PVE/Storage.pm | 2 +
> > src/PVE/Storage/LvmQcow2Plugin.pm | 460
> > ++++++++++++++++++++++++++++++
> > src/PVE/Storage/Makefile | 3 +-
> > 3 files changed, 464 insertions(+), 1 deletion(-)
> > create mode 100644 src/PVE/Storage/LvmQcow2Plugin.pm
> >
> > diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> > index 57b2038..119998f 100755
> > --- a/src/PVE/Storage.pm
> > +++ b/src/PVE/Storage.pm
> > @@ -28,6 +28,7 @@ use PVE::Storage::Plugin;
> > use PVE::Storage::DirPlugin;
> > use PVE::Storage::LVMPlugin;
> > use PVE::Storage::LvmThinPlugin;
> > +use PVE::Storage::LvmQcow2Plugin;
> > use PVE::Storage::NFSPlugin;
> > use PVE::Storage::CIFSPlugin;
> > use PVE::Storage::ISCSIPlugin;
> > @@ -54,6 +55,7 @@ our $KNOWN_EXPORT_FORMATS = ['raw+size',
> > 'tar+size', 'qcow2+size', 'vmdk+size',
> > PVE::Storage::DirPlugin->register();
> > PVE::Storage::LVMPlugin->register();
> > PVE::Storage::LvmThinPlugin->register();
> > +PVE::Storage::LvmQcow2Plugin->register();
> > PVE::Storage::NFSPlugin->register();
> > PVE::Storage::CIFSPlugin->register();
> > PVE::Storage::ISCSIPlugin->register();
> > diff --git a/src/PVE/Storage/LvmQcow2Plugin.pm
> > b/src/PVE/Storage/LvmQcow2Plugin.pm
> > new file mode 100644
> > index 0000000..68c8686
> > --- /dev/null
> > +++ b/src/PVE/Storage/LvmQcow2Plugin.pm
> > @@ -0,0 +1,460 @@
> > +package PVE::Storage::LvmQcow2Plugin;
> > +
> > +use strict;
> > +use warnings;
> > +
> > +use IO::File;
> > +
> > +use PVE::Tools qw(run_command trim);
> > +use PVE::Storage::Plugin;
> > +use PVE::Storage::LVMPlugin;
> > +use PVE::JSONSchema qw(get_standard_option);
> > +
> > +use base qw(PVE::Storage::LVMPlugin);
>>could we integrate this into the LVM plugin if we implement it?
>>basically add the "snapext" option, which is fixed, and if it is set,
>>disallow rootdir?
yes sure. I wanted to keep it separated for now to avoid to put "if
snapext" everwhere, and also lvmthin plugin inherit from lvmplugin,
but I can merge it, no problem.
>>probably snapext should also be fixed for dir(-based) storages, since
>>toggling it when snapshots exist would break a ton of stuff?
yes, indeed.
> > +
> > +# Configuration
> > +
> > +sub type {
> > + return 'lvmqcow2';
> > +}
> > +
> > +sub plugindata {
> > + return {
> > + #container not yet implemented #need to implemented dm-qcow2
> > + content => [ {images => 1, rootdir => 1}, { images => 1 }],
>>then rootdir shouldn't be mentioned here at all? the first member
>>contains the possible content types, the second the default if no
>>explicit ones are set..
ah ok, sorry.
> > + };
> > +}
> > +
> > +sub properties {
> > + return {
> > + };
> > +}
> > +
> > +sub options {
> > + return {
> > + vgname => { fixed => 1 },
> > + nodes => { optional => 1 },
> > + shared => { optional => 1 },
> > + disable => { optional => 1 },
> > + saferemove => { optional => 1 },
> > + saferemove_throughput => { optional => 1 },
> > + content => { optional => 1 },
> > + base => { fixed => 1, optional => 1 },
> > + tagged_only => { optional => 1 },
> > + bwlimit => { optional => 1 },
> > + snapext => { fixed => 1 },
> > + };
> > +}
> > +
> > +# Storage implementation
> > +
> > +sub parse_volname {
> > + my ($class, $volname) = @_;
> > +
> > + PVE::Storage::Plugin::parse_lvm_name($volname);
> > + my $format = $volname =~ m/^(.*)-snap-/ ? 'qcow2' : 'raw';
> > +
> > + if ($volname =~ m/^((vm|base)-(\d+)-\S+)$/) {
> > + return ('images', $1, $3, undef, undef, $2 eq 'base',
> > $format);
> > + }
>>I wonder if here we also want to keep the volid/volname like it is,
>>but name the LVs for snapshots differently?
maybe like zfs|rbd[btrfs snapshot, with volume@snapX for example ?
> > +
> > + die "unable to parse lvm volume name '$volname'\n";
> > +}
> > +
> > +sub filesystem_path {
> > + my ($class, $scfg, $volname, $snapname, $current_snap) = @_;
> > +
> > + my ($vtype, $name, $vmid) = $class->parse_volname($volname);
> > +
> > + my $vg = $scfg->{vgname};
> > +
> > + my $path = "/dev/$vg/$name";
> > +
> > + if($snapname) {
> > + $path = get_snap_volname($path, $snapname);
> > + } elsif ($current_snap) {
> > + $path = $current_snap->{file};
> > + }
see comment for the dir storage ;)
> > +
> > + return wantarray ? ($path, $vmid, $vtype) : $path;
> > +}
> > +
> > +sub create_base {
> > + my ($class, $storeid, $scfg, $volname) = @_;
> > +
> > + my $vg = $scfg->{vgname};
nit: this could move below, closer to where it is used..
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> > + $class->parse_volname($volname);
> > +
> > + die "create_base not possible with base image\n" if $isBase;
> > +
> > + die "unable to create base volume - found snapshot" if $class-
> > > snapshot_exist($scfg, $storeid, $volname);
> > +
> > + my $newname = $name;
> > + $newname =~ s/^vm-/base-/;
> > +
> > + my $cmd = ['/sbin/lvrename', $vg, $volname, $newname];
> > + run_command($cmd, errmsg => "lvrename '$vg/$volname' =>
> > '$vg/$newname' error");
> > +
> > + # set inactive, read-only flags
> > + $cmd = ['/sbin/lvchange', '-an', '-pr', "$vg/$newname"];
> > + eval { run_command($cmd); };
> > + warn $@ if $@;
> > +
> > + my $newvolname = $newname;
> > +
> > + return $newvolname;
> > +}
> > +
> > +sub clone_image {
> > + my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
> > +
> > + die "can't clone images in lvm storage\n";
> > +}
> > +
> > +sub alloc_image {
> > + my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> > +
> > + die "unsupported format '$fmt'" if $fmt ne 'raw';
>>here I also wonder whether it wouldn't be "easier" to only allocate
>>qcow2 formatted LVs (also for the initial allocation)?
>>
>>otherwise, this is basically alloc_image from the LVMPlugin, just
>>with the extra tags added, which could also be done where the
>>snapshots are handled further below..
If possible, I would like to try to have raw support too.
(to be able to easily enable snapshot on existing storage with needing
to convert TB of datas) and for performance too.
I'll do test with/without to see if the code is really more complex.
> > +
> > + die "illegal name '$name' - should be 'vm-$vmid-*'\n"
> > + if $name && $name !~ m/^vm-$vmid-/;
> > +
> > + my $vgs = PVE::Storage::LVMPlugin::lvm_vgs();
> > +
> > + my $vg = $scfg->{vgname};
> > +
> > + die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
> > +
> > + my $free = int($vgs->{$vg}->{free});
> > +
> > + die "not enough free space ($free < $size)\n" if $free <
> > $size;
> > +
> > + $name = $class->find_free_diskname($storeid, $scfg, $vmid)
> > + if !$name;
> > +
> > + my $tags = ["pve-vm-$vmid"];
> > + if ($name =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-(.*))?/)
> > {
> > + push @$tags, "\@pve-$1";
> > + }
>>I don't like this part for two reasons:
>>
>>1. without this, alloc_image is identical to LVMPlugin's (see
>>above/below)
>>2. the way the snapshot is encoded in the volname means I can "pvesm
>>alloc" something with a snapshot volname and break things..
>>
>>I think this should be refactored:
>>- alloc_image should allow (only?) qcow2
>>- it should use a custom helper for the actual lvcreate, but be
>>restricted to "proper" volume names
>>- it should use another helper for the qcow2 formatting
>>- volume_snapshot should use the same helpers, but call them with a
>>different LV name
>>
>>the same also applies to free_image, for similar reasons (don't allow
>>to call free_image with a snapshot directly, but use a common helper
>>for free_image and volume_snapshot_delete)
ok !
> > +
> > + PVE::Storage::LVMPlugin::lvcreate($vg, $name, $size, $tags);
> > +
> > + return $name;
> > +}
> > +
> > +sub volume_snapshot_info {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > +
> > + return $class->list_snapshots($scfg, $storeid, $volname);
why have two public subs for this? the $current_only would not be
needed if the volume itself would also be the current snapshot..
> > +}
> > +
> > +sub activate_volume {
> > + my ($class, $storeid, $scfg, $volname, $snapname, $cache) =
> > @_;
> > +
> > + my $lvm_activate_mode = 'ey';
> > + my $tag = undef;
$tag is undef
> > +
> > + #activate volume && all volumes snapshots by tag
> > + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-
> > (.*))?/)
> > {
> > + $tag = "\@pve-vm-$4-disk-$5";
here only the disk itself is put into the tag, the optional snap part
isn't.. and $snapname is ignored as well
> > + }
and what if the regex didn't match?
> > +
> > + my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $tag];
>>so this will only ever activate the "main" volume?
all snapshots have the same tag (the main volume name), so this will
activate every snasphot .
> > + run_command($cmd, errmsg => "can't activate LV '$tag'");
> > +
> > + $cmd = ['/sbin/lvchange', '--refresh', $tag];
> > + run_command($cmd, errmsg => "can't refresh LV '$tag' for
> > activation");
this should
- not use $volname to transfer $snapname, but pass in the volname
contained in the volid
- use $snapname ;)
> > +}
> > +
> > +sub deactivate_volume {
> > + my ($class, $storeid, $scfg, $volname, $snapname, $cache) =
> > @_;
> > +
> > + my $tag = undef;
> > + #deactivate volume && all volumes snasphots by tag
> > + if($volname =~ m/^(((vm|base)-(\d+)-disk-(\d+)))(-snap-
> > (.*))?/)
> > {
> > + $tag = "\@pve-vm-$4-disk-$5";
> > + }
same as for activate_volume applies here as well..
> > +
> > + my $cmd = ['/sbin/lvchange', '-aln', $tag];
> > + run_command($cmd, errmsg => "can't deactivate LV '$tag'");
> > +}
> > +
> > +sub volume_resize {
> > + my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
> > +
> > + #we should resize the base image and parents snapshots,
> > + #but how to manage rollback ?
> > +
> > + die "can't resize if snasphots exist" if $class-
> > > snapshot_exist($scfg, $storeid, $volname);
>>I don't think qemu requires backing file and overlay sizes to agree -
>>just that if you write back (commit) up the chain, you might need to
>>resize the backing file to accommodate the additional data/space. so
>>resizing should be fine (in theory at least)? also see the docs for
>>`qemu-img commit`.
I'll try thanks
> > +
> > + return 1;
> > +}
> > +
> > +sub volume_snapshot {
> > + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> > +
> > + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> > +
> > + my $current_path = $class->path($scfg, $volname, $storeid);
> > + my $current_format =
> > (PVE::Storage::Plugin::file_size_info($current_path))[1];
> > + my $snappath = get_snap_volname($current_path, $snap);
> > +
> > + my $snapvolname = get_snap_volname($volname, $snap);
> > + #allocate lvm snapshot volume
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> > + $class->parse_volname($volname);
> > + my $size = $class->volume_size_info($scfg, $storeid, $volname,
> > 5);
> > + #add 100M for qcow2 headers
> > + $size = int($size/1024) + (100*1024);
a pointer where that 100M comes from would be nice ;)
> > +
> > + $class->alloc_image($storeid, $scfg, $vmid, 'raw',
> > $snapvolname,
> > $size);
so this could instead use the regular alloc_image from stock LVM, and
then set the tags here if we want to keep alloc_image as raw-only..
which I don't think we really want ;)
> > +
> > + # create the qcow2 fs
> > + eval {
> > + my $cmd = ['/usr/bin/qemu-img', 'create', '-b',
> > $current_path,
> > + '-F', $current_format, '-f', 'qcow2',
> > $snappath];
> > + my $options = "extended_l2=on,";
> > + $options .=
> > PVE::Storage::Plugin::preallocation_cmd_option($scfg, 'qcow2');
> > + push @$cmd, '-o', $options;
> > + run_command($cmd);
> > + };
see comment for alloc_image above..
> > + if ($@) {
> > + eval { $class->free_image($storeid, $scfg, $snapvolname, 0) };
I guess this is okay, but it would read a bit cleaner if this would
call volume_snapshot_delete..
> > + warn $@ if $@;
> > + }
> > +}
> > +
> > +# Asserts that a rollback to $snap on $volname is possible.
> > +# If certain snapshots are preventing the rollback and $blockers
> > is
> > an array
> > +# reference, the snapshot names can be pushed onto $blockers prior
> > to dying.
> > +sub volume_rollback_is_possible {
> > + my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> > +
> > + my $path = $class->filesystem_path($scfg, $volname);
> > + my $snappath = get_snap_volname($path, $snap);
> > + my $currentpath = $class->path($scfg, $volname, $storeid);
> > + return 1 if $currentpath eq $snappath;
> > +
> > + die "can't rollback, '$snap' is not most recent snapshot on
> > '$volname'\n";
> > +
> > + return 1;
> > +}
same comments as for the dir-based patches apply here as well - if at
all possible, having a 1:1 mapping of snapshot name to LV name would be
great.. other than LVs not being hardlinkable, I think the same
considerations apply there as well..
> > +
> > +sub volume_snapshot_rollback {
> > + my ($class, $scfg, $storeid, $volname, $snap) = @_;
> > +
> > + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> > + #simply delete the current snapshot and recreate it
> > +
> > + my $snapvolname = get_snap_volname($volname, $snap);
> > +
> > + $class->free_image($storeid, $scfg, $snapvolname, 0);
> > + $class->volume_snapshot($scfg, $storeid, $volname, $snap);
> > +}
> > +
> > +sub volume_snapshot_delete {
> > + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> > +
> > + return 1 if $running;
> > +
> > + $class->activate_volume($storeid, $scfg, $volname, undef, {});
> > +
> > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid,
> > $volname);
> > + my $snappath = $snapshots->{$snap}->{file};
> > + if(!$snappath) {
> > + warn "$snap already deleted. skip\n";
> > + return;
> > + }
how can this happen? the snapshot info is generated by querying LVM for
a list of LVs..
> > +
> > + my $snapvolname = $snapshots->{$snap}->{volname};
> > + my $parentsnap = $snapshots->{$snap}->{parent};
> > + my $childsnap = $snapshots->{$snap}->{child};
> > + die "error: can't find a parent for this snapshot" if
> > !$parentsnap;
but the first snapshot doesn't have a parent?
> > +
> > + my $parentpath = $snapshots->{$parentsnap}->{file};
> > + my $parentformat = $snapshots->{$parentsnap}->{'format'} if
> > $parentsnap;
> > + my $childpath = $snapshots->{$childsnap}->{file} if
> > $childsnap;
>>unless someone manually messed with the snapshot tree, in the current
>>scheme any "snapshot" has a child?
the $snapshots have the full chain, including the base image.
I think this why I check it
> > + my $childformat = $snapshots->{$childsnap}->{'format'} if
> > $childsnap;
> > +
> > + print "merge snapshot $snap to $parentsnap\n";
> > + my $cmd = ['/usr/bin/qemu-img', 'commit', $snappath];
> > + run_command($cmd);
> > +
> > + #if we delete an intermediate snapshot, we need to link upper
> > snapshot to base snapshot
> > + if($childpath && -e $childpath) {
> > + die "missing parentsnap snapshot to rebase child $childpath\n" if
> > !$parentpath;
> > + print "link $childsnap to $parentsnap\n";
> > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parentpath,
> > '-
> > F', $parentformat, '-f', $childformat, $childpath];
> > + run_command($cmd);
> > + }
>>same here, commit + rebase -u should be the same as rebase ?
AFAIK, rebase is more used when you use multi-branch when you delete
snapshot delete and you need to merge the snapshot content into
multiple child snapshot.
and commit, is when you need to merge to parent
https://lists.nongnu.org/archive/html/qemu-devel/2019-08/msg04043.html
from what I have read in different place, rebase is slower
https://lists.defectivebydesign.org/archive/html/qemu-discuss/2019-08/msg00041.html
"Generally, rebase is going to be slower because it reads some clusters
and compares the old with the new backing file to see whether they are
the same. commit will not do that. (OTOH, if there are many clusters
in the old backing chain that happen to contain the same data as the
new
one, this will save space, because it won’t copy those clusters from
the
old backing chain.)
"
> > +
> > + #delete the snapshot
> > + $class->free_image($storeid, $scfg, $snapvolname, 0);
> > +
> > + return;
> > +}
> > +
> > +sub volume_has_feature {
> > + my ($class, $scfg, $feature, $storeid, $volname, $snapname,
> > $running) = @_;
> > +
> > + my $features = {
> > + snapshot => { current => 1 },
> > +# clone => { base => 1, snap => 1}, #don't allow to clone as we
> > can't activate the base between different host ?
>>that's only true for shared LVM though, and the rest would also work
>>for local LVM?
yes, for local lvm , it'll work. I'll check the shared option.
> > + template => { current => 1},
> > + copy => { base => 1, current => 1, snap => 1},
> > + sparseinit => { base => 1, current => 1},
> > + rename => {current => 1},
> > + };
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> > + $class->parse_volname($volname);
> > +
> > + my $key = undef;
> > + if($snapname){
> > + $key = 'snap';
> > + }else{
> > + $key = $isBase ? 'base' : 'current';
> > + }
> > + return 1 if $features->{$feature}->{$key};
> > +
> > + return undef;
> > +}
> > +
> > +sub get_snap_volname {
> > + my ($path, $snap) = @_;
> > +
> > + my $basepath = "";
> > + my $baseformat = "";
> > + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-
> > zA-
> > Z0-9]+))?(\.(raw|qcow2))?/) {
> > + $basepath = $1;
> > + $baseformat = $8;
> > + }
> > + my $snapvolname = $basepath."-snap-$snap.qcow2";
> > + return $snapvolname;
> > +}
> > +
> > +sub get_snapname_from_path {
> > + my ($path) = @_;
> > +
> > + if ($path =~ m/^((.*)((vm|base)-(\d+)-disk-(\d+)))(-snap-([a-
> > zA-
> > Z0-9]+))?(\.(raw|qcow2))?/) {
> > + my $snapname = $7;
> > + return $snapname;
> > + }
> > + die "can't parse snapname from path $path";
> > +}
> > +
> > +sub get_current_snapshot {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > +
> > + #get more recent ctime volume
> > + return $class->list_snapshots($scfg, $storeid, $volname, 1);
> > +}
> > +my $check_tags = sub {
> > + my ($tags) = @_;
> > +
> > + return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
> > +};
> > +
> > +sub list_images {
> > + my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
> > +
> > + my $vgname = $scfg->{vgname};
> > +
> > + $cache->{lvs} = PVE::Storage::LVMPlugin::lvm_list_volumes() if
> > !$cache->{lvs};
> > +
> > + my $res = [];
> > +
> > + if (my $dat = $cache->{lvs}->{$vgname}) {
> > +
> > + foreach my $volname (keys %$dat) {
> > +
> > + next if $volname !~ m/^(vm|base)-(\d+)-/;
> > + my $owner = $2;
> > +
> > + my $info = $dat->{$volname};
> > +
> > + next if $scfg->{tagged_only} && !&$check_tags($info->{tags});
> > +
> > + # Allow mirrored and RAID LVs
> > + next if $info->{lv_type} !~ m/^[-mMrR]$/;
> > +
> > + my $volid = "$storeid:$volname";
> > +
> > + if ($vollist) {
> > + my $found = grep { $_ eq $volid } @$vollist;
> > + next if !$found;
> > + } else {
> > + next if defined($vmid) && ($owner ne $vmid);
> > + }
> > +
> > + push @$res, {
> > + volid => $volid, format => 'raw', size => $info->{lv_size}, vmid
> > =>
> > $owner,
> > + ctime => $info->{ctime},
> > + };
>>but doesn't this now include all snapshot LVs as well? while
>>pretending they are raw?
yes, I was not sure here if we want to display snasphot volume or not ?
> > + }
> > + }
> > +
> > + return $res;
> > +}
> > +
> > +sub list_snapshots {
> > + my ($class, $scfg, $storeid, $volname, $current_only) = @_;
> > +
> > + my $vgname = $scfg->{vgname};
> > +
> > + my $basevolname = $volname;
> > + my $lvs = PVE::Storage::LVMPlugin::lvm_list_volumes($vgname);
this
> > +
> > + my $vg = $lvs->{$vgname};
and this seem to be unused?
> > +
> > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase,
> > $format) = $class->parse_volname($volname);
> > + my $snapshots = $class->list_images($storeid, $scfg, $vmid);
> > +
> > + my $info = {};
> > + for my $snap (@$snapshots) {
> > + my $snap_volid = $snap->{volid};
> > + next if ($snap_volid !~ m/$basevolname/);
same issue as with the dir patch - this allows partial matching if two
volumes share a name prefix
> > +
> > + my $snapname = get_snapname_from_path($snap_volid);
> > + my (undef, $snap_volname) =
> > PVE::Storage::parse_volume_id($snap_volid);
> > + my $snapfile = $class->filesystem_path($scfg, $snap_volname,
> > $snapname);
> > + $snapname = 'base' if !$snapname;
> > + $info->{$snapname}->{file} = $snapfile;
> > + $info->{$snapname}->{volname} = $snap_volname;
> > + $info->{$snapname}->{volid} = $snap_volid;
> > + $info->{$snapname}->{ctime} = $snap->{ctime};
> > +
> > + if (!$current_only) {
> > + my (undef, $format, undef, $parentfile, undef) =
> > PVE::Storage::Plugin::file_size_info($snapfile);
> > + next if !$parentfile && $snapname ne 'base'; #bad unlinked
> > snasphot
> > +
> > + my $parentname = get_snapname_from_path($parentfile) if
> > $parentfile;
> > + $parentname = 'base' if !$parentname && $parentfile;
> > +
> > + $info->{$snapname}->{'format'} = $format;
> > + $info->{$snapname}->{parent} = $parentname if $parentname;
> > + $info->{$parentname}->{child} = $snapname if $parentname;
> > + }
> > + }
> > +
> > + my @snapshots_sorted = sort { $info->{$b}{ctime} <=> $info-
> > > {$a}{ctime} } keys %$info;
> > + my $current_snapname = $snapshots_sorted[0];
> > + my $current_snapshot = $info->{$current_snapname};
> > + return $current_snapshot if $current_only;
this (returning to hashes with different structure) is easy to miss and
get wrong..
> > +
> > + $info->{current} = { %$current_snapshot };
especially if this is done anyway, so the caller can just look at that
if they only want the current snapshot..
> > + return $info;
> > +}
> > +
> > +sub snapshot_exist {
> > + my ($class, $scfg, $storeid, $volname) = @_;
> > +
> > + my $basepath = $class->filesystem_path($scfg, $volname);
> > + my $currentpath = $class->path($scfg, $volname, $storeid);
> > +
> > + die "can't resize if snasphots exist" if $currentpath ne
> > $basepath;
I think something here is wrong ;)
> > +
> > +}
> > +1;
> > diff --git a/src/PVE/Storage/Makefile b/src/PVE/Storage/Makefile
> > index d5cc942..1af8aab 100644
> > --- a/src/PVE/Storage/Makefile
> > +++ b/src/PVE/Storage/Makefile
> > @@ -14,7 +14,8 @@ SOURCES= \
> > PBSPlugin.pm \
> > BTRFSPlugin.pm \
> > LvmThinPlugin.pm \
> > - ESXiPlugin.pm
> > + ESXiPlugin.pm \
> > + LvmQcow2Plugin.pm
> >
> > .PHONY: install
> > install:
> > --
> > 2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-10-24 6:42 ` Fabian Grünbichler
2024-10-24 7:59 ` Giotta Simon RUAGH via pve-devel
@ 2024-10-25 5:52 ` DERUMIER, Alexandre via pve-devel
1 sibling, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-25 5:52 UTC (permalink / raw)
To: pve-devel; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 16973 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
Date: Fri, 25 Oct 2024 05:52:09 +0000
Message-ID: <100d9cf0211db39a5d94eb596bbca9f7760b2f7c.camel@groupe-cyllene.com>
>
>
> But even with that, you can still have performance impact.
> So yes, I think they are really usecase for workload when you only
> need
> snapshot time to time (before an upgrade for example), but max
> performance with no snaphot exist.
>>my main point here is - all other storages treat snapshots as
>>"cheap". if you combine raw+qcow2 snapshot overlays, suddenly
>>performance will get worse if you keep a snapshot around for whatever
>>reason..
Ok, I redone a lot of bench yesterday, with a real san storage, and I
don't see too much difference between qcow2 and raw. (something like
30000iops on raw and 28000~29000 iops on qcow2).
I have tested with 2TB qcow2 file to be sure, and with new qcow2 sub-
cluster feature with l2_extended, it's not too much.
The difference is a little more big on a local nvme (I think because of
low latency), but as the usecase is for network storage, it's ok.
Let's go for full .qcow2, it'll be easier ;)
> > > it's a bit confusing to have a volid ending with raw, with the
> > > current volume and all but the first snapshot actually being
> > > stored
> > > in qcow2 files, with the raw file being the "oldest" snapshot in
> > > the
> > > chain..
> if it's too confusing, we could use for example an .snap extension.
> (as we known that it's qcow2 behind)
>>I haven't thought yet about how to encode the snapshot name into the
>>snapshot file name, but yeah, maybe something like that would be
>>good. or maybe snap-VMID-disk-DISK.qcow2 ?
ok we can use snap-VMID-disk-DISK.qcow2 , I'll be easier for regex :p
> > > storage_migrate needs to handle external snapshots, or at least
> > > error
> > > out.
> it should already work. (I have tested move_disk, and live migration
> +
> storage migration). qemu_img_convert offline and qemu block job for
> live.
>>but don't all of those lose the snapshots? did you test it with
>>snapshots and rollback afterwards?
ok, sorry, I have tested clone a new vm from a snapshot. (which use the
same code). I don't remember how it's work with move disk of a running
vm when snaphot exist.
>
> The main problem is when you start a vm on a specific snasphot,
> we don't send the $snapname param.
>
> One way could be that qemu-server check the current snapshot from
> config when doing specific action like start.
>>if we manage to find a way to make the volid always point at the top
>>overlay, then that wouldn't be needed..
yes, indeed if we are able to rename the current snapshot file to vm-
100-disk-0.qcow2 , it's super easy :)
I need to do more test, because drive-reopen only seem to work if the
original drive is defined with -blockdev syntax. (It seem to clash on
nodename if it's not defined with -drive ).
I have begin to look to implement blockdev, it don't seem too much
difficult for the start command line, but I need check the hotplug
part.
Maybe for pve9 ? (it could open door to features like luks encryption
too or or)
I'll rework all the patches after my holiday, with both renaming of
current snapshot and only use .qcow2 format, it should be a lot of
more clean and KISS.
Thanks again for the review !
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
2024-10-24 9:48 ` Fabian Grünbichler
@ 2024-10-25 20:04 ` DERUMIER, Alexandre via pve-devel
[not found] ` <7974c74b2d3a85086e8eda76e52d7a2c58d1dcb9.camel@groupe-cyllene.com>
1 sibling, 0 replies; 27+ messages in thread
From: DERUMIER, Alexandre via pve-devel @ 2024-10-25 20:04 UTC (permalink / raw)
To: pve-devel, f.gruenbichler; +Cc: DERUMIER, Alexandre
[-- Attachment #1: Type: message/rfc822, Size: 15517 bytes --]
From: "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
To: "pve-devel@lists.proxmox.com" <pve-devel@lists.proxmox.com>, "f.gruenbichler@proxmox.com" <f.gruenbichler@proxmox.com>
Subject: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
Date: Fri, 25 Oct 2024 20:04:10 +0000
Message-ID: <7974c74b2d3a85086e8eda76e52d7a2c58d1dcb9.camel@groupe-cyllene.com>
-------- Message initial --------
De: Fabian Grünbichler <f.gruenbichler@proxmox.com>
À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>,
"DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
Cc: Giotta Simon RUAGH <Simon.Giotta@ruag.ch>
Objet: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot
support
Date: 24/10/2024 11:48:03
> Giotta Simon RUAGH via pve-devel <pve-devel@lists.proxmox.com> hat am
> 24.10.2024 09:59 CEST geschrieben:
> > I mean, if we don't allow .raw files to be snapshotted then this
> > problem doesn't exist ;)
>
> Quick comment from the bleacher; Adding a mechanism to shapshot raw
> disks might solve the TPM (tpmstate) snapshotting issue, as well as
> allowing containers to be snapshot.
>
> For context:
> When using a storage that does not natively support snapshotting (NFS
> on NetApp or similar enterprise storage, in particular), raw disks
> cannot be snapshot.
> Since tpmstate disks can only be stored as raw (as I understand they
> are just a binary blob?), this makes it impossible to snapshot or
> (link-)clone any VMs that have a TPM. This especially is an issue for
> current Windows clients.
> Same issue for LXC containers, as their storage format is raw only as
> well.
>
> https://antiphishing.vadesecure.com/v4?f=OVFyc3FkSEdWUWx0QkZXZpBaFZH9
> xbUoQi0GpC0KVIU1UWG2AZ7f_MrrmMArnShL&i=Sm1YaTk1OUR6bzFoY3JtMLa1y1UZBH
> RmExEJw6jsROc&k=Hbsl&r=dmh0RHJVSG1CUXhDTmJ3UlzJQNCs3CJCbvk0g2AF56AIGO
> 1hR25I2pdFPY1trx1rDP3XHfwmNmQ-
> fWda_VoksA&s=d330b0a625b7cfcbde904428642b953a712c1a40b54a60918ac39b62
> f8ca6535&u=https%3A%2F%2Fbugzilla.proxmox.com%2Fshow_bug.cgi%3Fid%3D4
> 693
>>no it does not - with the mechanisms proposed in this patch series,
>>only the initial volume can be raw, if it is snapshotted, the
>>overlays are qcow2. so anything reading from the volume needs qcow2
>>support, including swtpm.
>>
>>that's why containers are not on the table for now either..
Hi, I really don't known how swtpm is working, but for containers maybe
it could be possible
not yet merged to kernel, but a dm-qcow2 driver is on the roadmap :)
https://www.youtube.com/watch?v=Z7jPpWydEC8
another possibility is qemu-storage-daemon + nbd or vdpa export:
https://blog.deckhouse.io/lvm-qcow-csi-driver-shared-san-kubernetes-81455201590e
About vtpm, is it really a problem to not be able to snapshot ? (I
mean, does the content change regulary ? can't we just skip the disk ?
I really don't known how it's working, I don't use tpm :p)
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support
[not found] ` <7974c74b2d3a85086e8eda76e52d7a2c58d1dcb9.camel@groupe-cyllene.com>
@ 2024-10-28 11:12 ` Fabian Grünbichler
0 siblings, 0 replies; 27+ messages in thread
From: Fabian Grünbichler @ 2024-10-28 11:12 UTC (permalink / raw)
To: DERUMIER, Alexandre, pve-devel
On October 25, 2024 10:04 pm, DERUMIER, Alexandre wrote:
> -------- Message initial --------
> De: Fabian Grünbichler <f.gruenbichler@proxmox.com>
> À: Proxmox VE development discussion <pve-devel@lists.proxmox.com>,
> "DERUMIER, Alexandre" <alexandre.derumier@groupe-cyllene.com>
> Cc: Giotta Simon RUAGH <Simon.Giotta@ruag.ch>
> Objet: Re: [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot
> support
> Date: 24/10/2024 11:48:03
>
>
>> Giotta Simon RUAGH via pve-devel <pve-devel@lists.proxmox.com> hat am
>> 24.10.2024 09:59 CEST geschrieben:
>> > I mean, if we don't allow .raw files to be snapshotted then this
>> > problem doesn't exist ;)
>>
>> Quick comment from the bleacher; Adding a mechanism to shapshot raw
>> disks might solve the TPM (tpmstate) snapshotting issue, as well as
>> allowing containers to be snapshot.
>>
>> For context:
>> When using a storage that does not natively support snapshotting (NFS
>> on NetApp or similar enterprise storage, in particular), raw disks
>> cannot be snapshot.
>> Since tpmstate disks can only be stored as raw (as I understand they
>> are just a binary blob?), this makes it impossible to snapshot or
>> (link-)clone any VMs that have a TPM. This especially is an issue for
>> current Windows clients.
>> Same issue for LXC containers, as their storage format is raw only as
>> well.
>>
>> https://antiphishing.vadesecure.com/v4?f=OVFyc3FkSEdWUWx0QkZXZpBaFZH9
>> xbUoQi0GpC0KVIU1UWG2AZ7f_MrrmMArnShL&i=Sm1YaTk1OUR6bzFoY3JtMLa1y1UZBH
>> RmExEJw6jsROc&k=Hbsl&r=dmh0RHJVSG1CUXhDTmJ3UlzJQNCs3CJCbvk0g2AF56AIGO
>> 1hR25I2pdFPY1trx1rDP3XHfwmNmQ-
>> fWda_VoksA&s=d330b0a625b7cfcbde904428642b953a712c1a40b54a60918ac39b62
>> f8ca6535&u=https%3A%2F%2Fbugzilla.proxmox.com%2Fshow_bug.cgi%3Fid%3D4
>> 693
>
>>>no it does not - with the mechanisms proposed in this patch series,
>>>only the initial volume can be raw, if it is snapshotted, the
>>>overlays are qcow2. so anything reading from the volume needs qcow2
>>>support, including swtpm.
>>>
>>>that's why containers are not on the table for now either..
>
> Hi, I really don't known how swtpm is working, but for containers maybe
> it could be possible
anything that works for containers should probably also be applicable
for swtpm (the other direction depends on how exactly it is made to work
- e.g., if swtpm gets patched to read directly from a qcow2 file, that
doesn't transfer to qcow2 support for containers ;))
> not yet merged to kernel, but a dm-qcow2 driver is on the roadmap :)
> https://www.youtube.com/watch?v=Z7jPpWydEC8
>
> another possibility is qemu-storage-daemon + nbd or vdpa export:
> https://blog.deckhouse.io/lvm-qcow-csi-driver-shared-san-kubernetes-81455201590e
the issue with nbd is that it requires setting up ahead how many devices
are exposed that way, and that means it doesn't really scale well:
https://bugzilla.proxmox.com/show_bug.cgi?id=4693
> About vtpm, is it really a problem to not be able to snapshot ? (I
> mean, does the content change regulary ? can't we just skip the disk ?
> I really don't known how it's working, I don't use tpm :p)
see the above BZ - they are small enough that we could potentially use a
"poor" variant of external snapshots by just copying the image (it is
rather small after all, so keeping a full copy per snapshot isn't that
bad). or we teach swtpm to read qcow2 files ;)
it is important for windows VMs, since those use/require a TPM in modern
versions. and depending how you set up the VM, your disk encryption keys
might rely on the TPM state being correct, so not snapshotting it is not
really an option ;)
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2024-10-28 11:13 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20240930113153.2896648-1-alexandre.derumier@groupe-cyllene.com>
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 1/2] add external snasphot support Alexandre Derumier via pve-devel
2024-10-23 10:12 ` Fabian Grünbichler
2024-10-23 12:59 ` DERUMIER, Alexandre via pve-devel
[not found] ` <f066c13a25b30e3107a9dec8091b456ce2852293.camel@groupe-cyllene.com>
2024-10-24 6:42 ` Fabian Grünbichler
2024-10-24 7:59 ` Giotta Simon RUAGH via pve-devel
2024-10-24 9:48 ` Fabian Grünbichler
2024-10-25 20:04 ` DERUMIER, Alexandre via pve-devel
[not found] ` <7974c74b2d3a85086e8eda76e52d7a2c58d1dcb9.camel@groupe-cyllene.com>
2024-10-28 11:12 ` Fabian Grünbichler
2024-10-25 5:52 ` DERUMIER, Alexandre via pve-devel
2024-10-24 7:50 ` Fabian Grünbichler
2024-09-30 11:31 ` [pve-devel] [PATCH v2 qemu-server 1/1] implement external snapshot Alexandre Derumier via pve-devel
2024-10-23 10:14 ` Fabian Grünbichler
2024-10-23 14:31 ` DERUMIER, Alexandre via pve-devel
2024-10-23 18:09 ` DERUMIER, Alexandre via pve-devel
[not found] ` <aeb9b8ea34826483eabe7fec5e2c12b1e22e132f.camel@groupe-cyllene.com>
2024-10-24 7:43 ` Fabian Grünbichler
2024-09-30 11:31 ` [pve-devel] [PATCH v2 pve-storage 2/2] add lvmqcow2 plugin: (lvm with external qcow2 snapshot) Alexandre Derumier via pve-devel
2024-10-23 10:13 ` Fabian Grünbichler
2024-10-23 13:45 ` DERUMIER, Alexandre via pve-devel
[not found] ` <e976104d8ed7c365d8a482fa320a0691456e69c1.camel@groupe-cyllene.com>
2024-10-24 7:42 ` Fabian Grünbichler
2024-10-24 11:01 ` DERUMIER, Alexandre via pve-devel
2024-10-20 13:03 ` [pve-devel] [PATCH SERIES v2 pve-storage/qemu-server] add external qcow2 snapshot support DERUMIER, Alexandre via pve-devel
2024-10-20 17:34 ` Roland privat via pve-devel
2024-10-20 19:08 ` Esi Y via pve-devel
[not found] ` <CABtLnHqZVhDKnog6jaUBP4HcSwfanyEzWeLdUXnzJs2esJQQkA@mail.gmail.com>
2024-10-22 6:39 ` Thomas Lamprecht
2024-10-22 9:51 ` Esi Y via pve-devel
2024-10-22 14:54 ` DERUMIER, Alexandre via pve-devel
[not found] ` <2f07646b51c85ffe01089c2481dbb9680d75cfcb.camel@groupe-cyllene.com>
2024-10-24 3:37 ` Esi Y via pve-devel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox