From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id A3DB51FF185 for ; Mon, 21 Jul 2025 14:51:07 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id D9283ED1E; Mon, 21 Jul 2025 14:52:17 +0200 (CEST) Message-ID: <68faffd5-eb4e-4618-8f83-5d239b5ccea2@proxmox.com> Date: Mon, 21 Jul 2025 14:51:43 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird To: Proxmox Backup Server development discussion , Hannes Laimer References: <20250719125035.9926-1-c.ebner@proxmox.com> <20250719125035.9926-5-c.ebner@proxmox.com> Content-Language: en-US, de-DE From: Christian Ebner In-Reply-To: X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1753102297124 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.081 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LOTSOFHASH 0.25 Emails with lots of hash-like gibberish RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v9 01/46] datastore: add helpers for path/digest to s3 object key conversion X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Cc: pbs-devel Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On 7/21/25 2:29 PM, Hannes Laimer wrote: > On Sat Jul 19, 2025 at 2:49 PM CEST, Christian Ebner wrote: >> Adds helper methods to generate the s3 object keys given a relative >> path and filename for datastore contents or digest in case of chunk >> files. >> >> Regular datastore contents are stored by grouping them with a content >> prefix in the object key. In order to keep the object key length >> small, given the max limit of 1024 bytes {0], `.cnt` is used as >> content prefix. Chunks on the other hand are prefixed by `.chunks`, >> same as on regular datastores. >> >> The prefix allows for selective listing of either contents or chunks >> by providing the prefix to the respective api calls. >> >> [0] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html >> >> Signed-off-by: Christian Ebner >> --- >> changes since version 8: >> - added unit tests for helper functions >> >> Cargo.toml | 1 + >> pbs-datastore/Cargo.toml | 1 + >> pbs-datastore/src/lib.rs | 1 + >> pbs-datastore/src/s3.rs | 114 +++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 117 insertions(+) >> create mode 100644 pbs-datastore/src/s3.rs >> >> diff --git a/Cargo.toml b/Cargo.toml >> index adfa427d1..97783ddd5 100644 >> --- a/Cargo.toml >> +++ b/Cargo.toml >> @@ -77,6 +77,7 @@ proxmox-rest-server = { version = "1", features = [ "templates" ] } >> proxmox-router = { version = "3.2.2", default-features = false } >> proxmox-rrd = "1" >> proxmox-rrd-api-types = "1.0.2" >> +proxmox-s3-client = "1.0.0" >> # everything but pbs-config and pbs-client use "api-macro" >> proxmox-schema = "4" >> proxmox-section-config = "3" >> diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml >> index 56f6e9094..c42eff165 100644 >> --- a/pbs-datastore/Cargo.toml >> +++ b/pbs-datastore/Cargo.toml >> @@ -34,6 +34,7 @@ proxmox-borrow.workspace = true >> proxmox-human-byte.workspace = true >> proxmox-io.workspace = true >> proxmox-lang.workspace=true >> +proxmox-s3-client = { workspace = true, features = [ "impl" ] } >> proxmox-schema = { workspace = true, features = [ "api-macro" ] } >> proxmox-serde = { workspace = true, features = [ "serde_json" ] } >> proxmox-sys.workspace = true >> diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs >> index 5014b6c09..ffd0d91b2 100644 >> --- a/pbs-datastore/src/lib.rs >> +++ b/pbs-datastore/src/lib.rs >> @@ -182,6 +182,7 @@ pub mod manifest; >> pub mod paperkey; >> pub mod prune; >> pub mod read_chunk; >> +pub mod s3; >> pub mod store_progress; >> pub mod task_tracking; >> >> diff --git a/pbs-datastore/src/s3.rs b/pbs-datastore/src/s3.rs >> new file mode 100644 >> index 000000000..79e7548fb >> --- /dev/null >> +++ b/pbs-datastore/src/s3.rs >> @@ -0,0 +1,114 @@ >> +use std::path::{Path, PathBuf}; >> + >> +use anyhow::{bail, format_err, Error}; >> + >> +use proxmox_s3_client::S3ObjectKey; >> + >> +/// Object key prefix to group regular datastore contents (not chunks) >> +pub const S3_CONTENT_PREFIX: &str = ".cnt"; >> + >> +/// Generate a relative object key with content prefix from given path and filename >> +pub fn object_key_from_path(path: &Path, filename: &str) -> Result { >> + // Force the use of relative paths, otherwise this would loose the content prefix >> + if path.is_absolute() { >> + bail!("cannot generate object key from absolute path"); >> + } >> + if filename.contains('/') { >> + bail!("invalid filename containing slashes"); >> + } >> + let mut object_path = PathBuf::from(S3_CONTENT_PREFIX); >> + object_path.push(path); >> + object_path.push(filename); >> + >> + let object_key_str = object_path >> + .to_str() >> + .ok_or_else(|| format_err!("unexpected object key path"))?; >> + Ok(S3ObjectKey::from(object_key_str)) >> +} >> + >> +/// Generate a relative object key with chunk prefix from given digest >> +pub fn object_key_from_digest(digest: &[u8; 32]) -> Result { >> + let object_key = hex::encode(digest); >> + let digest_prefix = &object_key[..4]; >> + let object_key_string = format!(".chunks/{digest_prefix}/{object_key}"); > > I just skimmed of the S3 key specs, but I was wondering if having the > `digest_prefix` in the key actually adds anything. For FSs sure, but S3? > They say this is just chars for them, they don't infer hierarchy on `/`s, > so whatever optimisation they do with the prefix present, they should > also do without it, no? Yes, however the intention was to keep this analogous to the filesystem based datastore's in order to be able to fetch the contents by external tooling without the need to have a running PBS instance. So you could recreate a datastore locally if needed. > >> + Ok(S3ObjectKey::from(object_key_string.as_str())) >> +} >> + >> +/// Generate a relative object key with chunk prefix from given digest, extended by suffix >> +pub fn object_key_from_digest_with_suffix( >> + digest: &[u8; 32], >> + suffix: &str, >> +) -> Result { >> + if suffix.contains('/') { >> + bail!("invalid suffix containing slashes"); >> + } >> + let object_key = hex::encode(digest); >> + let digest_prefix = &object_key[..4]; >> + let object_key_string = format!(".chunks/{digest_prefix}/{object_key}{suffix}"); >> + Ok(S3ObjectKey::from(object_key_string.as_str())) >> +} >> + >> +#[test] >> +fn test_object_key_from_path() { >> + let path = Path::new("vm/100/2025-07-14T14:20:02Z"); >> + let filename = "drive-scsci0.img.fidx"; >> + assert_eq!( >> + object_key_from_path(path, filename).unwrap().to_string(), >> + ".cnt/vm/100/2025-07-14T14:20:02Z/drive-scsci0.img.fidx", >> + ); >> +} >> + >> +#[test] >> +fn test_object_key_from_empty_path() { >> + let path = Path::new(""); >> + let filename = ".marker"; >> + assert_eq!( >> + object_key_from_path(path, filename).unwrap().to_string(), >> + ".cnt/.marker", >> + ); >> +} >> + >> +#[test] >> +fn test_object_key_from_absolute_path() { >> + assert!(object_key_from_path(Path::new("/"), ".marker").is_err()); >> +} >> + >> +#[test] >> +fn test_object_key_from_path_incorrect_filename() { >> + assert!(object_key_from_path(Path::new(""), "/.marker").is_err()); >> +} >> + >> +#[test] >> +fn test_object_key_from_digest() { >> + use hex::FromHex; >> + let digest = >> + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") >> + .unwrap(); >> + assert_eq!( >> + object_key_from_digest(&digest).unwrap().to_string(), >> + ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8", >> + ); >> +} >> + >> +#[test] >> +fn test_object_key_from_digest_with_suffix() { >> + use hex::FromHex; >> + let digest = >> + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") >> + .unwrap(); >> + assert_eq!( >> + object_key_from_digest_with_suffix(&digest, ".0.bad") >> + .unwrap() >> + .to_string(), >> + ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8.0.bad", >> + ); >> +} >> + >> +#[test] >> +fn test_object_key_from_digest_with_invalid_suffix() { >> + use hex::FromHex; >> + let digest = >> + <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8") >> + .unwrap(); >> + assert!(object_key_from_digest_with_suffix(&digest, "/.0.bad").is_err()); >> +} > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel