From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 70E5F1FF165 for ; Thu, 3 Jul 2025 15:20:09 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5282014DAB; Thu, 3 Jul 2025 15:20:19 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Thu, 3 Jul 2025 15:18:29 +0200 Message-ID: <20250703131837.786811-42-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.2 In-Reply-To: <20250703131837.786811-1-c.ebner@proxmox.com> References: <20250703131837.786811-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.040 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v5 38/46] datastore: add local datastore cache for network attached storages X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Use a local datastore as cache using LRU cache replacement policy for operations on a datastore backed by a network, e.g. by an S3 object store backend. The goal is to reduce number of requests to the backend and thereby save costs (monetary as well as time). The cacher allows to fetch cache items on cache misses via the access method. Signed-off-by: Christian Ebner --- pbs-datastore/src/datastore.rs | 53 +++++- pbs-datastore/src/lib.rs | 3 + .../src/local_datastore_lru_cache.rs | 169 ++++++++++++++++++ 3 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 pbs-datastore/src/local_datastore_lru_cache.rs diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index e481335ee..7d3042b54 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -40,8 +40,9 @@ use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; use crate::fixed_index::{FixedIndexReader, FixedIndexWriter}; use crate::hierarchy::{ListGroups, ListGroupsType, ListNamespaces, ListNamespacesRecursive}; use crate::index::IndexFile; +use crate::local_datastore_lru_cache::S3Cacher; use crate::task_tracking::{self, update_active_operations}; -use crate::DataBlob; +use crate::{DataBlob, LocalDatastoreLruCache}; static DATASTORE_MAP: LazyLock>>> = LazyLock::new(|| Mutex::new(HashMap::new())); @@ -135,6 +136,7 @@ pub struct DataStoreImpl { last_digest: Option<[u8; 32]>, sync_level: DatastoreFSyncLevel, backend_config: DatastoreBackendConfig, + lru_store_caching: Option, } impl DataStoreImpl { @@ -150,6 +152,7 @@ impl DataStoreImpl { last_digest: None, sync_level: Default::default(), backend_config: Default::default(), + lru_store_caching: None, }) } } @@ -254,6 +257,37 @@ impl DataStore { Ok(backend_type) } + pub fn cache(&self) -> Option<&LocalDatastoreLruCache> { + self.inner.lru_store_caching.as_ref() + } + + /// Check if the digest is present in the local datastore cache. + /// Always returns false if there is no cache configured for this datastore. + pub fn cache_contains(&self, digest: &[u8; 32]) -> bool { + if let Some(cache) = self.inner.lru_store_caching.as_ref() { + return cache.contains(digest); + } + false + } + + /// Insert digest as most recently used on in the cache. + /// Returns with success if there is no cache configured for this datastore. + pub fn cache_insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> { + if let Some(cache) = self.inner.lru_store_caching.as_ref() { + return cache.insert(digest, chunk); + } + Ok(()) + } + + pub fn cacher(&self) -> Result, Error> { + self.backend().map(|backend| match backend { + DatastoreBackend::S3(s3_client) => { + Some(S3Cacher::new(s3_client, self.inner.chunk_store.clone())) + } + DatastoreBackend::Filesystem => None, + }) + } + pub fn lookup_datastore( name: &str, operation: Option, @@ -436,6 +470,16 @@ impl DataStore { .parse_property_string(config.backend.as_deref().unwrap_or(""))?, )?; + const LOCAL_DATASTORE_CACHE_SIZE: usize = 10_000_000; + let lru_store_caching = if DatastoreBackendType::S3 == backend_config.ty.unwrap_or_default() + { + let cache = + LocalDatastoreLruCache::new(LOCAL_DATASTORE_CACHE_SIZE, chunk_store.clone()); + Some(cache) + } else { + None + }; + Ok(DataStoreImpl { chunk_store, gc_mutex: Mutex::new(()), @@ -445,6 +489,7 @@ impl DataStore { last_digest, sync_level: tuning.sync_level.unwrap_or_default(), backend_config, + lru_store_caching, }) } @@ -1589,6 +1634,12 @@ impl DataStore { chunk_count += 1; if atime < min_atime { + if let Some(cache) = self.cache() { + let mut digest_bytes = [0u8; 32]; + hex::decode_to_slice(digest.as_bytes(), &mut digest_bytes)?; + // ignore errors, phase 3 will retry cleanup anyways + let _ = cache.remove(&digest_bytes); + } delete_list.push(content.key); if bad { gc_status.removed_bad += 1; diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs index e6f65575b..f1ad3d4c2 100644 --- a/pbs-datastore/src/lib.rs +++ b/pbs-datastore/src/lib.rs @@ -216,3 +216,6 @@ pub use snapshot_reader::SnapshotReader; mod local_chunk_reader; pub use local_chunk_reader::LocalChunkReader; + +mod local_datastore_lru_cache; +pub use local_datastore_lru_cache::LocalDatastoreLruCache; diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs new file mode 100644 index 000000000..3b16daa9c --- /dev/null +++ b/pbs-datastore/src/local_datastore_lru_cache.rs @@ -0,0 +1,169 @@ +//! Use a local datastore as cache for operations on a datastore attached via +//! a network layer (e.g. via the S3 backend). + +use std::future::Future; +use std::sync::Arc; + +use anyhow::{bail, Error}; +use http_body_util::BodyExt; + +use pbs_s3_client::S3Client; +use pbs_tools::async_lru_cache::{AsyncCacher, AsyncLruCache}; + +use crate::ChunkStore; +use crate::DataBlob; + +#[derive(Clone)] +pub struct S3Cacher { + client: Arc, + store: Arc, +} + +impl AsyncCacher<[u8; 32], ()> for S3Cacher { + fn fetch( + &self, + key: [u8; 32], + ) -> Box, Error>> + Send + 'static> { + let client = self.client.clone(); + let store = self.store.clone(); + Box::new(async move { + match client.get_object(key.into()).await? { + None => bail!("could not fetch object with key {}", hex::encode(key)), + Some(response) => { + let bytes = response.content.collect().await?.to_bytes(); + let chunk = DataBlob::from_raw(bytes.to_vec())?; + store.insert_chunk(&chunk, &key)?; + Ok(Some(())) + } + } + }) + } +} + +impl S3Cacher { + pub fn new(client: Arc, store: Arc) -> Self { + Self { client, store } + } +} + +/// LRU cache using local datastore for caching chunks +/// +/// Uses a LRU cache, but without storing the values in-memory but rather +/// on the filesystem +pub struct LocalDatastoreLruCache { + cache: AsyncLruCache<[u8; 32], ()>, + store: Arc, +} + +impl LocalDatastoreLruCache { + pub fn new(capacity: usize, store: Arc) -> Self { + Self { + cache: AsyncLruCache::new(capacity), + store, + } + } + + /// Insert a new chunk into the local datastore cache. + /// + /// Fails if the chunk cannot be inserted successfully. + pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> { + self.store.insert_chunk(chunk, digest)?; + self.cache.insert(*digest, (), |digest| { + let (path, _digest_str) = self.store.chunk_path(&digest); + // Truncate to free up space but keep the inode around, since that + // is used as marker for chunks in use by garbage collection. + if let Err(err) = nix::unistd::truncate(&path, 0) { + if err != nix::errno::Errno::ENOENT { + return Err(Error::from(err)); + } + } + Ok(()) + }) + } + + /// Remove a chunk from the local datastore cache. + /// + /// Fails if the chunk cannot be deleted successfully. + pub fn remove(&self, digest: &[u8; 32]) -> Result<(), Error> { + self.cache.remove(*digest); + let (path, _digest_str) = self.store.chunk_path(digest); + std::fs::remove_file(path).map_err(Error::from) + } + + pub async fn access( + &self, + digest: &[u8; 32], + cacher: &mut S3Cacher, + ) -> Result, Error> { + if self + .cache + .access(*digest, cacher, |digest| { + let (path, _digest_str) = self.store.chunk_path(&digest); + // Truncate to free up space but keep the inode around, since that + // is used as marker for chunks in use by garbage collection. + if let Err(err) = nix::unistd::truncate(&path, 0) { + if err != nix::errno::Errno::ENOENT { + return Err(Error::from(err)); + } + } + Ok(()) + }) + .await? + .is_some() + { + let (path, _digest_str) = self.store.chunk_path(digest); + let mut file = match std::fs::File::open(&path) { + Ok(file) => file, + Err(err) => { + // Expected chunk to be present since LRU cache has it, but it is missing + // locally, try to fetch again + if err.kind() == std::io::ErrorKind::NotFound { + match cacher.client.get_object(digest.into()).await? { + None => { + bail!("could not fetch object with key {}", hex::encode(digest)) + } + Some(response) => { + let bytes = response.content.collect().await?.to_bytes(); + let chunk = DataBlob::from_raw(bytes.to_vec())?; + self.store.insert_chunk(&chunk, digest)?; + std::fs::File::open(&path)? + } + } + } else { + return Err(Error::from(err)); + } + } + }; + let chunk = match DataBlob::load_from_reader(&mut file) { + Ok(chunk) => chunk, + Err(err) => { + use std::io::Seek; + // Check if file is empty marker file, try fetching content if so + if file.seek(std::io::SeekFrom::End(0))? == 0 { + match cacher.client.get_object(digest.into()).await? { + None => { + bail!("could not fetch object with key {}", hex::encode(digest)) + } + Some(response) => { + let bytes = response.content.collect().await?.to_bytes(); + let chunk = DataBlob::from_raw(bytes.to_vec())?; + self.store.insert_chunk(&chunk, digest)?; + let mut file = std::fs::File::open(&path)?; + DataBlob::load_from_reader(&mut file)? + } + } + } else { + return Err(err); + } + } + }; + Ok(Some(chunk)) + } else { + Ok(None) + } + } + + pub fn contains(&self, digest: &[u8; 32]) -> bool { + self.cache.contains(*digest) + } +} -- 2.47.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel