From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <a.lauterer@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 57CA0BF58
 for <pve-devel@lists.proxmox.com>; Fri,  8 Apr 2022 12:14:51 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id DB829D9C3
 for <pve-devel@lists.proxmox.com>; Fri,  8 Apr 2022 12:14:20 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS id D9A09D8BE
 for <pve-devel@lists.proxmox.com>; Fri,  8 Apr 2022 12:14:17 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id AFDB645A0C
 for <pve-devel@lists.proxmox.com>; Fri,  8 Apr 2022 12:14:17 +0200 (CEST)
From: Aaron Lauterer <a.lauterer@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Fri,  8 Apr 2022 12:14:16 +0200
Message-Id: <20220408101416.165312-5-a.lauterer@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20220408101416.165312-1-a.lauterer@proxmox.com>
References: <20220408101416.165312-1-a.lauterer@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.029 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pve-devel] [PATCH manager 4/4] ceph pools: allow to create erasure
 code pools
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 08 Apr 2022 10:14:51 -0000

When using erasure coded pools for RBD storages, the main use case in
this patch, we need a replicated pool that will hold the RBD omap and
other metadata. The EC pool itself will only hold the data objects.

The coupling happens when an RBD image is created by adding the
--data-pool parameter. This is why we have the 'data-pool' parameter in
the storage configuration.

To follow already established semantics, once the 'ecprofile' parameter
is provided, we will create a 'X-metadata' and 'X-data' pool. The
storage configuration is always added as it is the only thing that links
the two together (besides naming schemes).

Different pg_num defaults are chosen for the replicated metadata pool as
it will not hold a lot of data.

Signed-off-by: Aaron Lauterer <a.lauterer@proxmox.com>
---
At first I though that we should add another API endpoint just to create
EC pools, but that then brings the problem with it, that we need a new
(sub)path for the new POST endpoint.

Since we do not actually change that much in the existing one to support
ec pools, I went for that now. We do need to copy over the pool params
for the ec pool and change defaults a bit for the meta and data pool.


 PVE/API2/Ceph/Pools.pm | 46 ++++++++++++++++++++++++++++++++++++++----
 PVE/Ceph/Tools.pm      | 11 +++++++---
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/PVE/API2/Ceph/Pools.pm b/PVE/API2/Ceph/Pools.pm
index 05855e15..1a6a346b 100644
--- a/PVE/API2/Ceph/Pools.pm
+++ b/PVE/API2/Ceph/Pools.pm
@@ -280,7 +280,7 @@ my $ceph_pool_common_options = sub {
 
 
 my $add_storage = sub {
-    my ($pool, $storeid) = @_;
+    my ($pool, $storeid, $data_pool) = @_;
 
     my $storage_params = {
 	type => 'rbd',
@@ -290,6 +290,8 @@ my $add_storage = sub {
 	content => 'rootdir,images',
     };
 
+    $storage_params->{'data-pool'} = $data_pool if $data_pool;
+
     PVE::API2::Storage::Config->create($storage_params);
 };
 
@@ -334,6 +336,13 @@ __PACKAGE__->register_method ({
 		type => 'boolean',
 		optional => 1,
 	    },
+	    ecprofile => {
+		description => "Erasure code profile to use. This will create a replicated ".
+			       "metadata pool, an erasure coded metadata pool and the storage ".
+			       "configuration.",
+		type => 'string',
+		optional => 1,
+	    },
 	    %{ $ceph_pool_common_options->() },
 	},
     },
@@ -344,10 +353,17 @@ __PACKAGE__->register_method ({
 	PVE::Cluster::check_cfs_quorum();
 	PVE::Ceph::Tools::check_ceph_configured();
 
-	my $pool = extract_param($param, 'name');
+	my $name = extract_param($param, 'name');
+	my $pool = $name;
 	my $node = extract_param($param, 'node');
 	my $add_storages = extract_param($param, 'add_storages');
 
+	my $ecprofile = extract_param($param, 'ecprofile');
+	die "Erasure code profile '$ecprofile' does not exist.\n"
+	    if $ecprofile && !PVE::Ceph::Tools::ecprofile_exists($ecprofile);
+
+	$add_storages = 1 if $ecprofile;
+
 	my $rpcenv = PVE::RPCEnvironment::get();
 	my $user = $rpcenv->get_user();
 
@@ -370,13 +386,35 @@ __PACKAGE__->register_method ({
 	$param->{application} //= 'rbd';
 	$param->{pg_autoscale_mode} //= 'warn';
 
+	my $data_param = {};
+	my $data_pool = '';
+
+	if ($ecprofile) {
+	    # copy all params, should be a flat hash
+	    $data_param = { map { $_ => $param->{$_} } keys %$param };
+
+	    $data_param->{pool_type} = 'erasure';
+	    $data_param->{allow_ec_overwrites} = 'true';
+	    $data_param->{erasure_code_profile} = $ecprofile;
+	    delete $data_param->{size};
+	    delete $data_param->{min_size};
+
+	    # metadata pool should be ok with 32 PGs
+	    $param->{pg_num} = 32;
+
+	    $pool = "${name}-metadata";
+	    $data_pool = "${name}-data";
+	}
+
 	my $worker = sub {
 
 	    PVE::Ceph::Tools::create_pool($pool, $param);
 
+	    PVE::Ceph::Tools::create_pool($data_pool, $data_param) if $ecprofile;
+
 	    if ($add_storages) {
-		eval { $add_storage->($pool, "${pool}") };
-		die "adding PVE storage for ceph pool '$pool' failed: $@\n" if $@;
+		eval { $add_storage->($pool, "${name}", $data_pool) };
+		die "adding PVE storage for ceph pool '$name' failed: $@\n" if $@;
 	    }
 	};
 
diff --git a/PVE/Ceph/Tools.pm b/PVE/Ceph/Tools.pm
index 91aa6ce5..18051e06 100644
--- a/PVE/Ceph/Tools.pm
+++ b/PVE/Ceph/Tools.pm
@@ -8,7 +8,7 @@ use File::Basename;
 use IO::File;
 use JSON;
 
-use PVE::Tools qw(run_command dir_glob_foreach);
+use PVE::Tools qw(run_command dir_glob_foreach extract_param);
 use PVE::Cluster qw(cfs_read_file);
 use PVE::RADOS;
 use PVE::Ceph::Services;
@@ -264,12 +264,17 @@ sub create_pool {
 
     my $pg_num = $param->{pg_num} || 128;
 
-    $rados->mon_command({
+    my $mon_params = {
 	prefix => "osd pool create",
 	pool => $pool,
 	pg_num => int($pg_num),
 	format => 'plain',
-    });
+    };
+    $mon_params->{pool_type} = extract_param($param, 'pool_type') if $param->{pool_type};
+    $mon_params->{erasure_code_profile} = extract_param($param, 'erasure_code_profile')
+	if $param->{erasure_code_profile};
+
+    $rados->mon_command($mon_params);
 
     set_pool($pool, $param);
 
-- 
2.30.2