From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 7CC6A1FF185 for ; Mon, 21 Jul 2025 14:54:07 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 644ABEFF0; Mon, 21 Jul 2025 14:55:18 +0200 (CEST) Mime-Version: 1.0 Date: Mon, 21 Jul 2025 14:55:14 +0200 Message-Id: From: "Hannes Laimer" To: "Christian Ebner" , "Proxmox Backup Server development discussion" , "Hannes Laimer" X-Mailer: aerc 0.20.1-112-gd31995f1e20b References: <20250719125035.9926-1-c.ebner@proxmox.com> <20250719125035.9926-5-c.ebner@proxmox.com> <68faffd5-eb4e-4618-8f83-5d239b5ccea2@proxmox.com> In-Reply-To: <68faffd5-eb4e-4618-8f83-5d239b5ccea2@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1753102507247 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.100 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LOTSOFHASH 0.25 Emails with lots of hash-like gibberish SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v9 01/46] datastore: add helpers for path/digest to s3 object key conversion X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Cc: pbs-devel Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On Mon Jul 21, 2025 at 2:51 PM CEST, Christian Ebner wrote: > On 7/21/25 2:29 PM, Hannes Laimer wrote: >> On Sat Jul 19, 2025 at 2:49 PM CEST, Christian Ebner wrote: >>> Adds helper methods to generate the s3 object keys given a relative >>> path and filename for datastore contents or digest in case of chunk >>> files. >>> >>> Regular datastore contents are stored by grouping them with a content >>> prefix in the object key. In order to keep the object key length >>> small, given the max limit of 1024 bytes {0], `.cnt` is used as >>> content prefix. Chunks on the other hand are prefixed by `.chunks`, >>> same as on regular datastores. >>> >>> The prefix allows for selective listing of either contents or chunks >>> by providing the prefix to the respective api calls. >>> >>> [0] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html >>> >>> Signed-off-by: Christian Ebner >>> --- >>> changes since version 8: >>> - added unit tests for helper functions >>> >>> Cargo.toml | 1 + >>> pbs-datastore/Cargo.toml | 1 + >>> pbs-datastore/src/lib.rs | 1 + >>> pbs-datastore/src/s3.rs | 114 +++++++++++++++++++++++++++++++++++++++ >>> 4 files changed, 117 insertions(+) >>> create mode 100644 pbs-datastore/src/s3.rs >>> >>> diff --git a/Cargo.toml b/Cargo.toml >>> index adfa427d1..97783ddd5 100644 >>> --- a/Cargo.toml >>> +++ b/Cargo.toml >>> @@ -77,6 +77,7 @@ proxmox-rest-server = { version = "1", features = [ "templates" ] } >>> proxmox-router = { version = "3.2.2", default-features = false } >>> proxmox-rrd = "1" >>> proxmox-rrd-api-types = "1.0.2" >>> +proxmox-s3-client = "1.0.0" >>> # everything but pbs-config and pbs-client use "api-macro" >>> proxmox-schema = "4" >>> proxmox-section-config = "3" >>> diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml >>> index 56f6e9094..c42eff165 100644 >>> --- a/pbs-datastore/Cargo.toml >>> +++ b/pbs-datastore/Cargo.toml >>> @@ -34,6 +34,7 @@ proxmox-borrow.workspace = true >>> proxmox-human-byte.workspace = true >>> proxmox-io.workspace = true >>> proxmox-lang.workspace=true >>> +proxmox-s3-client = { workspace = true, features = [ "impl" ] } >>> proxmox-schema = { workspace = true, features = [ "api-macro" ] } >>> proxmox-serde = { workspace = true, features = [ "serde_json" ] } >>> proxmox-sys.workspace = true >>> diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs >>> index 5014b6c09..ffd0d91b2 100644 >>> --- a/pbs-datastore/src/lib.rs >>> +++ b/pbs-datastore/src/lib.rs >>> @@ -182,6 +182,7 @@ pub mod manifest; >>> pub mod paperkey; >>> pub mod prune; >>> pub mod read_chunk; >>> +pub mod s3; >>> pub mod store_progress; >>> pub mod task_tracking; >>> >>> diff --git a/pbs-datastore/src/s3.rs b/pbs-datastore/src/s3.rs >>> new file mode 100644 >>> index 000000000..79e7548fb >>> --- /dev/null >>> +++ b/pbs-datastore/src/s3.rs >>> @@ -0,0 +1,114 @@ >>> +use std::path::{Path, PathBuf}; >>> + >>> +use anyhow::{bail, format_err, Error}; >>> + >>> +use proxmox_s3_client::S3ObjectKey; >>> + >>> +/// Object key prefix to group regular datastore contents (not chunks) >>> +pub const S3_CONTENT_PREFIX: &str = ".cnt"; >>> + >>> +/// Generate a relative object key with content prefix from given path and filename >>> +pub fn object_key_from_path(path: &Path, filename: &str) -> Result { >>> + // Force the use of relative paths, otherwise this would loose the content prefix >>> + if path.is_absolute() { >>> + bail!("cannot generate object key from absolute path"); >>> + } >>> + if filename.contains('/') { >>> + bail!("invalid filename containing slashes"); >>> + } >>> + let mut object_path = PathBuf::from(S3_CONTENT_PREFIX); >>> + object_path.push(path); >>> + object_path.push(filename); >>> + >>> + let object_key_str = object_path >>> + .to_str() >>> + .ok_or_else(|| format_err!("unexpected object key path"))?; >>> + Ok(S3ObjectKey::from(object_key_str)) >>> +} >>> + >>> +/// Generate a relative object key with chunk prefix from given digest >>> +pub fn object_key_from_digest(digest: &[u8; 32]) -> Result { >>> + let object_key = hex::encode(digest); >>> + let digest_prefix = &object_key[..4]; >>> + let object_key_string = format!(".chunks/{digest_prefix}/{object_key}"); >> >> I just skimmed of the S3 key specs, but I was wondering if having the >> `digest_prefix` in the key actually adds anything. For FSs sure, but S3? >> They say this is just chars for them, they don't infer hierarchy on `/`s, >> so whatever optimisation they do with the prefix present, they should >> also do without it, no? > > Yes, however the intention was to keep this analogous to the filesystem > based datastore's in order to be able to fetch the contents by external > tooling without the need to have a running PBS instance. So you could > recreate a datastore locally if needed. > makes sense :) just didn't know if longer/shorter keys have any performance implications(probably not I assume) >> >>> + Ok(S3ObjectKey::from(object_key_string.as_str())) >>> +} >>> + >>> +/// Generate a relative object key with chunk prefix from given digest, extended by suffix >>> +pub fn object_key_from_digest_with_suffix( >>> + digest: &[u8; 32], >>> + suffix: &str, >>> +) -> Result { >>> + if suffix.contains('/') { >>> + bail!("invalid suffix containing slashes"); >>> + } >>> + let object_key = hex::encode(digest); >>> + let digest_prefix = &object_key[..4]; >>> + let object_key_string = format!(".chunks/{digest_prefix}/{object_key}{suffix}"); >>> + Ok(S3ObjectKey::from(object_key_string.as_str())) >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_path() { >>> + let path = Path::new("vm/100/2025-07-14T14:20:02Z"); >>> + let filename = "drive-scsci0.img.fidx"; >>> + assert_eq!( >>> + object_key_from_path(path, filename).unwrap().to_string(), >>> + ".cnt/vm/100/2025-07-14T14:20:02Z/drive-scsci0.img.fidx", >>> + ); >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_empty_path() { >>> + let path = Path::new(""); >>> + let filename = ".marker"; >>> + assert_eq!( >>> + object_key_from_path(path, filename).unwrap().to_string(), >>> + ".cnt/.marker", >>> + ); >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_absolute_path() { >>> + assert!(object_key_from_path(Path::new("/"), ".marker").is_err()); >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_path_incorrect_filename() { >>> + assert!(object_key_from_path(Path::new(""), "/.marker").is_err()); >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_digest() { >>> + use hex::FromHex; >>> + let digest = >>> + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") >>> + .unwrap(); >>> + assert_eq!( >>> + object_key_from_digest(&digest).unwrap().to_string(), >>> + ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8", >>> + ); >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_digest_with_suffix() { >>> + use hex::FromHex; >>> + let digest = >>> + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") >>> + .unwrap(); >>> + assert_eq!( >>> + object_key_from_digest_with_suffix(&digest, ".0.bad") >>> + .unwrap() >>> + .to_string(), >>> + ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8.0.bad", >>> + ); >>> +} >>> + >>> +#[test] >>> +fn test_object_key_from_digest_with_invalid_suffix() { >>> + use hex::FromHex; >>> + let digest = >>> + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") >>> + .unwrap(); >>> + assert!(object_key_from_digest_with_suffix(&digest, "/.0.bad").is_err()); >>> +} >> >> >> >> _______________________________________________ >> pbs-devel mailing list >> pbs-devel@lists.proxmox.com >> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >> >> _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel