* [pve-devel] [PATCH storage] rbd: fix #3969: add rbd dev paths with cluster info
@ 2022-04-12 13:40 Aaron Lauterer
0 siblings, 0 replies; only message in thread
From: Aaron Lauterer @ 2022-04-12 13:40 UTC (permalink / raw)
To: pve-devel; +Cc: Thomas Lamprecht
By adding our own customized rbd udev rules and ceph-rbdnamer we can
create device paths that include the cluster fsid and avoid any
ambiguity if the same pool and namespace combination is used in
multiple clusters we connect to.
Additionally to the '/dev/rbd/<pool>/...' paths we now have
'/dev/rbd-pve/<cluster fsid>/<pool>/...' paths.
The other half of the patch makes use of the new device paths in the RBD
plugin.
The cluster fsid is read from the 'ceph.conf' file in the case of a
hyperconverged setup. In the case of an external Ceph cluster we need to
fetch it via a rados api call.
Co-authored-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Aaron Lauterer <a.lauterer@proxmox.com>
---
Makefile | 1 +
PVE/Storage/RBDPlugin.pm | 64 ++++++++++++++++++++++++--------------
udev-rbd/50-rbd-pve.rules | 2 ++
udev-rbd/Makefile | 21 +++++++++++++
udev-rbd/ceph-rbdnamer-pve | 24 ++++++++++++++
5 files changed, 89 insertions(+), 23 deletions(-)
create mode 100644 udev-rbd/50-rbd-pve.rules
create mode 100644 udev-rbd/Makefile
create mode 100755 udev-rbd/ceph-rbdnamer-pve
diff --git a/Makefile b/Makefile
index 431db16..029b586 100644
--- a/Makefile
+++ b/Makefile
@@ -41,6 +41,7 @@ install: PVE pvesm.1 pvesm.bash-completion pvesm.zsh-completion
install -d ${DESTDIR}${SBINDIR}
install -m 0755 pvesm ${DESTDIR}${SBINDIR}
make -C PVE install
+ make -C udev-rbd install
install -d ${DESTDIR}/usr/share/man/man1
install -m 0644 pvesm.1 ${DESTDIR}/usr/share/man/man1/
gzip -9 -n ${DESTDIR}/usr/share/man/man1/pvesm.1
diff --git a/PVE/Storage/RBDPlugin.pm b/PVE/Storage/RBDPlugin.pm
index e287e28..c6f0afb 100644
--- a/PVE/Storage/RBDPlugin.pm
+++ b/PVE/Storage/RBDPlugin.pm
@@ -8,6 +8,7 @@ use JSON;
use Net::IP;
use PVE::CephConfig;
+use PVE::Cluster qw(cfs_read_file);;
use PVE::JSONSchema qw(get_standard_option);
use PVE::ProcFSTools;
use PVE::RADOS;
@@ -22,9 +23,36 @@ my $get_parent_image_name = sub {
return $parent->{image} . "@" . $parent->{snapshot};
};
+my $librados_connect = sub {
+ my ($scfg, $storeid, $options) = @_;
+
+ my $librados_config = PVE::CephConfig::ceph_connect_option($scfg, $storeid);
+
+ my $rados = PVE::RADOS->new(%$librados_config);
+
+ return $rados;
+};
+
my sub get_rbd_path {
- my ($scfg, $volume) = @_;
- my $path = $scfg->{pool} ? $scfg->{pool} : 'rbd';
+ my ($scfg, $storeid, $volume) = @_;
+
+ my $cluster_id = '';
+ if ($scfg->{monhost}) {
+ my $rados = $librados_connect->($scfg, $storeid);
+ $cluster_id = $rados->mon_command({ prefix => 'fsid', format => 'json' })->{fsid};
+ } else {
+ $cluster_id = cfs_read_file('ceph.conf')->{global}->{fsid};
+ }
+
+ my $uuid_pattern = "([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})";
+ if ($cluster_id =~ qr/^${uuid_pattern}$/is) {
+ $cluster_id = $1; # use untained value
+ } else {
+ die "cluster fsid has invalid format\n";
+ }
+
+ my $path = "${cluster_id}";
+ $path .= $scfg->{pool} ? "/$scfg->{pool}" : '/rbd';
$path .= "/$scfg->{namespace}" if defined($scfg->{namespace});
$path .= "/$volume" if defined($volume);
return $path;
@@ -70,16 +98,6 @@ my $rados_cmd = sub {
return $build_cmd->('/usr/bin/rados', $scfg, $storeid, $op, @options);
};
-my $librados_connect = sub {
- my ($scfg, $storeid, $options) = @_;
-
- my $librados_config = PVE::CephConfig::ceph_connect_option($scfg, $storeid);
-
- my $rados = PVE::RADOS->new(%$librados_config);
-
- return $rados;
-};
-
# needed for volumes created using ceph jewel (or higher)
my $krbd_feature_update = sub {
my ($scfg, $storeid, $name) = @_;
@@ -380,8 +398,8 @@ sub path {
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
$name .= '@'.$snapname if $snapname;
- my $rbd_path = get_rbd_path($scfg, $name);
- return ("/dev/rbd/${rbd_path}", $vmid, $vtype) if $scfg->{krbd};
+ my $rbd_path = get_rbd_path($scfg, $storeid, $name);
+ return ("/dev/rbd-pve/${rbd_path}", $vmid, $vtype) if $scfg->{krbd};
my $path = "rbd:${rbd_path}";
@@ -449,8 +467,8 @@ sub create_base {
$scfg,
$storeid,
'rename',
- get_rbd_path($scfg, $name),
- get_rbd_path($scfg, $newname),
+ get_rbd_path($scfg, $storeid, $name),
+ get_rbd_path($scfg, $storeid, $newname),
);
run_rbd_command($cmd, errmsg => "rbd rename '$name' error");
@@ -498,12 +516,12 @@ sub clone_image {
$newvol = $name if length($snapname);
my @options = (
- get_rbd_path($scfg, $basename),
+ get_rbd_path($scfg, $storeid, $basename),
'--snap', $snap,
);
push @options, ('--data-pool', $scfg->{'data-pool'}) if $scfg->{'data-pool'};
- my $cmd = $rbd_cmd->($scfg, $storeid, 'clone', @options, get_rbd_path($scfg, $name));
+ my $cmd = $rbd_cmd->($scfg, $storeid, 'clone', @options, get_rbd_path($scfg, $storeid, $name));
run_rbd_command($cmd, errmsg => "rbd clone '$basename' error");
return $newvol;
@@ -561,7 +579,7 @@ sub list_images {
$cache->{rbd} = rbd_ls($scfg, $storeid) if !$cache->{rbd};
- my $dat = $cache->{rbd}->{get_rbd_path($scfg)};
+ my $dat = $cache->{rbd}->{get_rbd_path($scfg, $storeid)};
return [] if !$dat; # nothing found
my $res = [];
@@ -619,8 +637,8 @@ sub deactivate_storage {
}
my sub get_kernel_device_path {
- my ($scfg, $name) = @_;
- return "/dev/rbd/" . get_rbd_path($scfg, $name);
+ my ($scfg, $storeid, $name) = @_;
+ return "/dev/rbd-pve/" . get_rbd_path($scfg, $storeid, $name);
};
sub map_volume {
@@ -631,7 +649,7 @@ sub map_volume {
my $name = $img_name;
$name .= '@'.$snapname if $snapname;
- my $kerneldev = get_kernel_device_path($scfg, $name);
+ my $kerneldev = get_kernel_device_path($scfg, $storeid, $name);
return $kerneldev if -b $kerneldev; # already mapped
@@ -650,7 +668,7 @@ sub unmap_volume {
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
$name .= '@'.$snapname if $snapname;
- my $kerneldev = get_kernel_device_path($scfg, $name);
+ my $kerneldev = get_kernel_device_path($scfg, $storeid, $name);
if (-b $kerneldev) {
my $cmd = $rbd_cmd->($scfg, $storeid, 'unmap', $kerneldev);
diff --git a/udev-rbd/50-rbd-pve.rules b/udev-rbd/50-rbd-pve.rules
new file mode 100644
index 0000000..79432df
--- /dev/null
+++ b/udev-rbd/50-rbd-pve.rules
@@ -0,0 +1,2 @@
+KERNEL=="rbd[0-9]*", ENV{DEVTYPE}=="disk", PROGRAM="/usr/libexec/ceph-rbdnamer-pve %k", SYMLINK+="rbd-pve/%c"
+KERNEL=="rbd[0-9]*", ENV{DEVTYPE}=="partition", PROGRAM="/usr/libexec/ceph-rbdnamer-pve %k", SYMLINK+="rbd-pve/%c-part%n"
diff --git a/udev-rbd/Makefile b/udev-rbd/Makefile
new file mode 100644
index 0000000..065933b
--- /dev/null
+++ b/udev-rbd/Makefile
@@ -0,0 +1,21 @@
+PACKAGE=libpve-storage-perl
+
+DESTDIR=
+PREFIX=/usr
+LIBEXECDIR=${PREFIX}/libexec
+LIBDIR=${PREFIX}/lib
+
+all:
+
+.PHONY: install
+install: 50-rbd-pve.rules ceph-rbdnamer-pve
+ install -d ${DESTDIR}${LIBEXECDIR}
+ install -m 0755 ceph-rbdnamer-pve ${DESTDIR}${LIBEXECDIR}
+ install -d ${DESTDIR}${LIBDIR}/udev/rules.d
+ install -m 0644 50-rbd-pve.rules ${DESTDIR}${LIBDIR}/udev/rules.d
+
+.PHONY: clean
+clean:
+
+.PHONY: distclean
+distclean: clean
diff --git a/udev-rbd/ceph-rbdnamer-pve b/udev-rbd/ceph-rbdnamer-pve
new file mode 100755
index 0000000..23dd626
--- /dev/null
+++ b/udev-rbd/ceph-rbdnamer-pve
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+DEV=$1
+NUM=`echo $DEV | sed 's#p.*##g; s#[a-z]##g'`
+POOL=`cat /sys/devices/rbd/$NUM/pool`
+CLUSTER_FSID=`cat /sys/devices/rbd/$NUM/cluster_fsid`
+
+if [ -f /sys/devices/rbd/$NUM/pool_ns ]; then
+ NAMESPACE=`cat /sys/devices/rbd/$NUM/pool_ns`
+else
+ NAMESPACE=""
+fi
+IMAGE=`cat /sys/devices/rbd/$NUM/name`
+SNAP=`cat /sys/devices/rbd/$NUM/current_snap`
+
+echo -n "/$CLUSTER_FSID/$POOL"
+
+if [ -n "$NAMESPACE" ]; then
+ echo -n "/$NAMESPACE"
+fi
+echo -n "/$IMAGE"
+if [ "$SNAP" != "-" ]; then
+ echo -n "@$SNAP"
+fi
--
2.30.2
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-04-12 13:40 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-12 13:40 [pve-devel] [PATCH storage] rbd: fix #3969: add rbd dev paths with cluster info Aaron Lauterer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox