From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <n.ullreich@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id EC330F4E8
 for <pve-devel@lists.proxmox.com>; Fri, 21 Jul 2023 14:23:19 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id D39571EB0A
 for <pve-devel@lists.proxmox.com>; Fri, 21 Jul 2023 14:23:19 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Fri, 21 Jul 2023 14:23:18 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 376E3421C5
 for <pve-devel@lists.proxmox.com>; Fri, 21 Jul 2023 14:23:18 +0200 (CEST)
From: Noel Ullreich <n.ullreich@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Fri, 21 Jul 2023 14:23:11 +0200
Message-Id: <20230721122314.80427-2-n.ullreich@proxmox.com>
X-Mailer: git-send-email 2.39.2
In-Reply-To: <20230721122314.80427-1-n.ullreich@proxmox.com>
References: <20230721122314.80427-1-n.ullreich@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.059 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pve-devel] [PATCH pve-storage v4 1/3] recursively go through
 subdirs to find files
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 21 Jul 2023 12:23:20 -0000

This patch allows `get_subdir_files` to recursively call itself, so that
subdirectories of set depth can be searched. We allow searching for
isos, vztmpl and snippets but not backups.

As a security measure, when parsing a given path, parent
directories (`/../`) are forbidden.

The feature is opt-in, i.e. the searchdepth is 0 by default. It can be
changed via the API, the web interface and `pvesm` (see the other
patches).

Signed-off-by: Noel Ullreich <n.ullreich@proxmox.com>
---
 src/PVE/Storage.pm        | 11 ++++++++
 src/PVE/Storage/Plugin.pm | 54 ++++++++++++++++++++++++---------------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
index b99ed35..02abacf 100755
--- a/src/PVE/Storage.pm
+++ b/src/PVE/Storage.pm
@@ -113,6 +113,17 @@ our $VZTMPL_EXT_RE_1 = qr/\.tar\.(gz|xz|zst)/i;
 
 our $BACKUP_EXT_RE_2 = qr/\.(tgz|(?:tar|vma)(?:\.(${\PVE::Storage::Plugin::COMPRESSOR_RE}))?)/;
 
+our $INTERMEDIATE_SUBDIR_EXT_RE_3 = qr/(?:[0-9A-z\_\-\.]+\/)*[^\/]+/i;
+
+our $SUBDIR_ANY_FILEEXTENSION_EXT_RE_4 = qr/(?:[0-9A-z\_\-\.]+\/)*.+/i;
+
+# '..' is forbidden at the beginning, between two '/' and at the end
+my $dots = quotemeta('..');
+my $beginning = qr!^$dots/!;
+my $between = qr!/$dots/!;
+my $end = qr!/$dots$!;
+our $forbidden_double_dots_re = qr!(?:$beginning|$between|$end)!;
+
 # FIXME remove with PVE 8.0, add versioned breaks for pve-manager
 our $vztmpl_extension_re = $VZTMPL_EXT_RE_1;
 
diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
index 9d3b1ae..8831fbb 100644
--- a/src/PVE/Storage/Plugin.pm
+++ b/src/PVE/Storage/Plugin.pm
@@ -621,6 +621,8 @@ sub parse_name_dir {
 sub parse_volname {
     my ($class, $volname) = @_;
 
+    die "volname must not contain parent directories '/../'\n" if $volname =~ $PVE::Storage::forbidden_double_dots_re;
+
     if ($volname =~ m!^(\d+)/(\S+)/(\d+)/(\S+)$!) {
 	my ($basedvmid, $basename) = ($1, $2);
 	parse_name_dir($basename);
@@ -631,9 +633,9 @@ sub parse_volname {
 	my ($vmid, $name) = ($1, $2);
 	my (undef, $format, $isBase) = parse_name_dir($name);
 	return ('images', $name, $vmid, undef, undef, $isBase, $format);
-    } elsif ($volname =~ m!^iso/([^/]+$PVE::Storage::ISO_EXT_RE_0)$!) {
+    } elsif ($volname =~ m!^iso/($PVE::Storage::INTERMEDIATE_SUBDIR_EXT_RE_3$PVE::Storage::ISO_EXT_RE_0)$!) {
 	return ('iso', $1);
-    } elsif ($volname =~ m!^vztmpl/([^/]+$PVE::Storage::VZTMPL_EXT_RE_1)$!) {
+    } elsif ($volname =~ m!^vztmpl/($PVE::Storage::INTERMEDIATE_SUBDIR_EXT_RE_3$PVE::Storage::VZTMPL_EXT_RE_1)$!) {
 	return ('vztmpl', $1);
     } elsif ($volname =~ m!^rootdir/(\d+)$!) {
 	return ('rootdir', $1, $1);
@@ -643,7 +645,7 @@ sub parse_volname {
 	    return ('backup', $fn, $2);
 	}
 	return ('backup', $fn);
-    } elsif ($volname =~ m!^snippets/([^/]+)$!) {
+    } elsif ($volname =~ m!^snippets/($PVE::Storage::INTERMEDIATE_SUBDIR_EXT_RE_3)$!) {
 	return ('snippets', $1);
     }
 
@@ -1212,28 +1214,33 @@ sub list_images {
 }
 
 # list templates ($tt = <iso|vztmpl|backup|snippets>)
-my $get_subdir_files = sub {
-    my ($sid, $path, $tt, $vmid) = @_;
+sub get_subdir_files {
+    my ($sid, $path, $tt, $scfg, $vmid, $remaining_depth) = @_;
+    my $storage_path = $scfg->{path};
+    my $content_dir = $scfg->{"content-dirs"}->{$tt} // $vtype_subdirs->{$tt};
 
     my $res = [];
 
     foreach my $fn (<$path/*>) {
 	my $st = File::stat::stat($fn);
 
-	next if (!$st || S_ISDIR($st->mode));
+	next if (!$st);
+
+	if (S_ISDIR($st->mode)) {
+	    if ($remaining_depth) {
+		push @$res, get_subdir_files($sid, $fn, $tt, $scfg, $vmid, $remaining_depth-1);
+	    }
+	    next;
+	}
 
 	my $info;
 
 	if ($tt eq 'iso') {
-	    next if $fn !~ m!/([^/]+$PVE::Storage::ISO_EXT_RE_0)$!i;
-
+	    next if $fn !~ m/(?:^$storage_path\/$content_dir\/)($PVE::Storage::INTERMEDIATE_SUBDIR_EXT_RE_3$PVE::Storage::ISO_EXT_RE_0)/;
 	    $info = { volid => "$sid:iso/$1", format => 'iso' };
-
 	} elsif ($tt eq 'vztmpl') {
-	    next if $fn !~ m!/([^/]+$PVE::Storage::VZTMPL_EXT_RE_1)$!;
-
+	    next if $fn !~ m/(?:^$storage_path\/$content_dir\/)($PVE::Storage::INTERMEDIATE_SUBDIR_EXT_RE_3$PVE::Storage::VZTMPL_EXT_RE_1)/;
 	    $info = { volid => "$sid:vztmpl/$1", format => "t$2" };
-
 	} elsif ($tt eq 'backup') {
 	    next if $fn !~ m!/([^/]+$PVE::Storage::BACKUP_EXT_RE_2)$!;
 	    my $original = $fn;
@@ -1262,9 +1269,9 @@ my $get_subdir_files = sub {
 
 	    $info->{protected} = 1 if -e PVE::Storage::protection_file_path($original);
 	} elsif ($tt eq 'snippets') {
-
+	    next if $fn !~ m/(?:^$storage_path\/$content_dir\/)($PVE::Storage::SUBDIR_ANY_FILEEXTENSION_EXT_RE_4)/;
 	    $info = {
-		volid => "$sid:snippets/". basename($fn),
+		volid => "$sid:snippets/$1", #basename($fn),
 		format => 'snippet',
 	    };
 	}
@@ -1274,14 +1281,18 @@ my $get_subdir_files = sub {
 
 	push @$res, $info;
     }
-
     return $res;
 };
 
+sub flatten {
+    map { ref eq 'ARRAY' ? flatten(@{$_}) : $_ } @_;
+}
+
 # If attributes are set on a volume, they should be included in the result.
 # See get_volume_attribute for a list of possible attributes.
 sub list_volumes {
     my ($class, $storeid, $scfg, $vmid, $content_types) = @_;
+    my $max_depth = $scfg->{'scan-depth'} // 0;
 
     my $res = [];
     my $vmlist = PVE::Cluster::get_vmlist();
@@ -1294,17 +1305,19 @@ sub list_volumes {
 	    my $path = $class->get_subdir($scfg, $type);
 
 	    if ($type eq 'iso' && !defined($vmid)) {
-		$data = $get_subdir_files->($storeid, $path, 'iso');
+		$data = get_subdir_files($storeid, $path, 'iso', $scfg, undef, $max_depth);
 	    } elsif ($type eq 'vztmpl'&& !defined($vmid)) {
-		$data = $get_subdir_files->($storeid, $path, 'vztmpl');
+		$data = get_subdir_files($storeid, $path , 'vztmpl', $scfg, undef, $max_depth);
 	    } elsif ($type eq 'backup') {
-		$data = $get_subdir_files->($storeid, $path, 'backup', $vmid);
+		$data = get_subdir_files($storeid, $path, 'backup', $scfg, $vmid, $max_depth);
 	    } elsif ($type eq 'snippets') {
-		$data = $get_subdir_files->($storeid, $path, 'snippets');
+		$data = get_subdir_files($storeid, $path, 'snippets', $scfg, undef, $max_depth);
 	    }
 	}
 
-	next if !$data;
+	$data = [flatten($data)];
+
+	next if !@$data[0];
 
 	foreach my $item (@$data) {
 	    if ($type eq 'images' || $type eq 'rootdir') {
@@ -1322,7 +1335,6 @@ sub list_volumes {
 	    } else {
 		$item->{content} = $type;
 	    }
-
 	    push @$res, $item;
 	}
     }
-- 
2.39.2