From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 2B7CA1FF185 for ; Mon, 21 Jul 2025 14:28:41 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 8931DDD0E; Mon, 21 Jul 2025 14:29:51 +0200 (CEST) Mime-Version: 1.0 Date: Mon, 21 Jul 2025 14:29:18 +0200 Message-Id: Cc: "pbs-devel" From: "Hannes Laimer" To: "Proxmox Backup Server development discussion" X-Mailer: aerc 0.20.1-112-gd31995f1e20b References: <20250719125035.9926-1-c.ebner@proxmox.com> <20250719125035.9926-5-c.ebner@proxmox.com> In-Reply-To: <20250719125035.9926-5-c.ebner@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1753100950935 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.102 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LOTSOFHASH 0.25 Emails with lots of hash-like gibberish RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v9 01/46] datastore: add helpers for path/digest to s3 object key conversion X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On Sat Jul 19, 2025 at 2:49 PM CEST, Christian Ebner wrote: > Adds helper methods to generate the s3 object keys given a relative > path and filename for datastore contents or digest in case of chunk > files. > > Regular datastore contents are stored by grouping them with a content > prefix in the object key. In order to keep the object key length > small, given the max limit of 1024 bytes {0], `.cnt` is used as > content prefix. Chunks on the other hand are prefixed by `.chunks`, > same as on regular datastores. > > The prefix allows for selective listing of either contents or chunks > by providing the prefix to the respective api calls. > > [0] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html > > Signed-off-by: Christian Ebner > --- > changes since version 8: > - added unit tests for helper functions > > Cargo.toml | 1 + > pbs-datastore/Cargo.toml | 1 + > pbs-datastore/src/lib.rs | 1 + > pbs-datastore/src/s3.rs | 114 +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 117 insertions(+) > create mode 100644 pbs-datastore/src/s3.rs > > diff --git a/Cargo.toml b/Cargo.toml > index adfa427d1..97783ddd5 100644 > --- a/Cargo.toml > +++ b/Cargo.toml > @@ -77,6 +77,7 @@ proxmox-rest-server = { version = "1", features = [ "templates" ] } > proxmox-router = { version = "3.2.2", default-features = false } > proxmox-rrd = "1" > proxmox-rrd-api-types = "1.0.2" > +proxmox-s3-client = "1.0.0" > # everything but pbs-config and pbs-client use "api-macro" > proxmox-schema = "4" > proxmox-section-config = "3" > diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml > index 56f6e9094..c42eff165 100644 > --- a/pbs-datastore/Cargo.toml > +++ b/pbs-datastore/Cargo.toml > @@ -34,6 +34,7 @@ proxmox-borrow.workspace = true > proxmox-human-byte.workspace = true > proxmox-io.workspace = true > proxmox-lang.workspace=true > +proxmox-s3-client = { workspace = true, features = [ "impl" ] } > proxmox-schema = { workspace = true, features = [ "api-macro" ] } > proxmox-serde = { workspace = true, features = [ "serde_json" ] } > proxmox-sys.workspace = true > diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs > index 5014b6c09..ffd0d91b2 100644 > --- a/pbs-datastore/src/lib.rs > +++ b/pbs-datastore/src/lib.rs > @@ -182,6 +182,7 @@ pub mod manifest; > pub mod paperkey; > pub mod prune; > pub mod read_chunk; > +pub mod s3; > pub mod store_progress; > pub mod task_tracking; > > diff --git a/pbs-datastore/src/s3.rs b/pbs-datastore/src/s3.rs > new file mode 100644 > index 000000000..79e7548fb > --- /dev/null > +++ b/pbs-datastore/src/s3.rs > @@ -0,0 +1,114 @@ > +use std::path::{Path, PathBuf}; > + > +use anyhow::{bail, format_err, Error}; > + > +use proxmox_s3_client::S3ObjectKey; > + > +/// Object key prefix to group regular datastore contents (not chunks) > +pub const S3_CONTENT_PREFIX: &str = ".cnt"; > + > +/// Generate a relative object key with content prefix from given path and filename > +pub fn object_key_from_path(path: &Path, filename: &str) -> Result { > + // Force the use of relative paths, otherwise this would loose the content prefix > + if path.is_absolute() { > + bail!("cannot generate object key from absolute path"); > + } > + if filename.contains('/') { > + bail!("invalid filename containing slashes"); > + } > + let mut object_path = PathBuf::from(S3_CONTENT_PREFIX); > + object_path.push(path); > + object_path.push(filename); > + > + let object_key_str = object_path > + .to_str() > + .ok_or_else(|| format_err!("unexpected object key path"))?; > + Ok(S3ObjectKey::from(object_key_str)) > +} > + > +/// Generate a relative object key with chunk prefix from given digest > +pub fn object_key_from_digest(digest: &[u8; 32]) -> Result { > + let object_key = hex::encode(digest); > + let digest_prefix = &object_key[..4]; > + let object_key_string = format!(".chunks/{digest_prefix}/{object_key}"); I just skimmed of the S3 key specs, but I was wondering if having the `digest_prefix` in the key actually adds anything. For FSs sure, but S3? They say this is just chars for them, they don't infer hierarchy on `/`s, so whatever optimisation they do with the prefix present, they should also do without it, no? > + Ok(S3ObjectKey::from(object_key_string.as_str())) > +} > + > +/// Generate a relative object key with chunk prefix from given digest, extended by suffix > +pub fn object_key_from_digest_with_suffix( > + digest: &[u8; 32], > + suffix: &str, > +) -> Result { > + if suffix.contains('/') { > + bail!("invalid suffix containing slashes"); > + } > + let object_key = hex::encode(digest); > + let digest_prefix = &object_key[..4]; > + let object_key_string = format!(".chunks/{digest_prefix}/{object_key}{suffix}"); > + Ok(S3ObjectKey::from(object_key_string.as_str())) > +} > + > +#[test] > +fn test_object_key_from_path() { > + let path = Path::new("vm/100/2025-07-14T14:20:02Z"); > + let filename = "drive-scsci0.img.fidx"; > + assert_eq!( > + object_key_from_path(path, filename).unwrap().to_string(), > + ".cnt/vm/100/2025-07-14T14:20:02Z/drive-scsci0.img.fidx", > + ); > +} > + > +#[test] > +fn test_object_key_from_empty_path() { > + let path = Path::new(""); > + let filename = ".marker"; > + assert_eq!( > + object_key_from_path(path, filename).unwrap().to_string(), > + ".cnt/.marker", > + ); > +} > + > +#[test] > +fn test_object_key_from_absolute_path() { > + assert!(object_key_from_path(Path::new("/"), ".marker").is_err()); > +} > + > +#[test] > +fn test_object_key_from_path_incorrect_filename() { > + assert!(object_key_from_path(Path::new(""), "/.marker").is_err()); > +} > + > +#[test] > +fn test_object_key_from_digest() { > + use hex::FromHex; > + let digest = > + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") > + .unwrap(); > + assert_eq!( > + object_key_from_digest(&digest).unwrap().to_string(), > + ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8", > + ); > +} > + > +#[test] > +fn test_object_key_from_digest_with_suffix() { > + use hex::FromHex; > + let digest = > + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") > + .unwrap(); > + assert_eq!( > + object_key_from_digest_with_suffix(&digest, ".0.bad") > + .unwrap() > + .to_string(), > + ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8.0.bad", > + ); > +} > + > +#[test] > +fn test_object_key_from_digest_with_invalid_suffix() { > + use hex::FromHex; > + let digest = > + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") > + .unwrap(); > + assert!(object_key_from_digest_with_suffix(&digest, "/.0.bad").is_err()); > +} _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel