From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id EB9B677ADB for ; Wed, 28 Apr 2021 18:07:36 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id A3CF8119DF for ; Wed, 28 Apr 2021 18:07:06 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 0BF4F1199F for ; Wed, 28 Apr 2021 18:07:05 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id D6B364294D for ; Wed, 28 Apr 2021 18:07:04 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Wed, 28 Apr 2021 18:06:54 +0200 Message-Id: <20210428160655.29941-3-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20210428160655.29941-1-s.reiter@proxmox.com> References: <20210428160655.29941-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.016 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup 2/3] RemoteChunkReader: add LRU cached variant X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Apr 2021 16:07:37 -0000 Retain the old constructor for compatibility, most use cases don't need an LRU cache anyway. Uses the MmapBuffer backend. For now convert the 'map' API to use the new variant, as the same set of chunks might be accessed multiple times in a random pattern there. Signed-off-by: Stefan Reiter --- src/bin/proxmox_backup_client/mount.rs | 12 ++- src/client/remote_chunk_reader.rs | 110 +++++++++++++++++++++---- 2 files changed, 104 insertions(+), 18 deletions(-) diff --git a/src/bin/proxmox_backup_client/mount.rs b/src/bin/proxmox_backup_client/mount.rs index f3498e35..6494c900 100644 --- a/src/bin/proxmox_backup_client/mount.rs +++ b/src/bin/proxmox_backup_client/mount.rs @@ -280,7 +280,17 @@ async fn mount_do(param: Value, pipe: Option) -> Result { } else if server_archive_name.ends_with(".fidx") { let index = client.download_fixed_index(&manifest, &server_archive_name).await?; let size = index.index_bytes(); - let chunk_reader = RemoteChunkReader::new(client.clone(), crypt_config, file_info.chunk_crypt_mode(), HashMap::new()); + let chunk_reader = RemoteChunkReader::new_lru_cached( + client.clone(), + crypt_config, + file_info.chunk_crypt_mode(), + HashMap::new(), + 16, + index + .chunk_info(0) + .map(|info| info.size() as usize) + .unwrap_or(4 * 1024 * 1024), + )?; let reader = AsyncIndexReader::new(index, chunk_reader); let name = &format!("{}:{}/{}", repo.to_string(), path, archive_name); diff --git a/src/client/remote_chunk_reader.rs b/src/client/remote_chunk_reader.rs index 06f693a2..35e279bf 100644 --- a/src/client/remote_chunk_reader.rs +++ b/src/client/remote_chunk_reader.rs @@ -1,5 +1,6 @@ use std::future::Future; use std::collections::HashMap; +use std::convert::TryInto; use std::pin::Pin; use std::sync::{Arc, Mutex}; @@ -8,6 +9,14 @@ use anyhow::{bail, Error}; use super::BackupReader; use crate::backup::{AsyncReadChunk, CryptConfig, CryptMode, DataBlob, ReadChunk}; use crate::tools::runtime::block_on; +use crate::tools::lru_cache::LruCache; +use crate::tools::mmap_buffer::{MmapBuffer, MmapBufferEntry}; + +struct Cache { + cache_hint: HashMap<[u8; 32], usize>, + hinted: HashMap<[u8; 32], Vec>, + lru: Option<(LruCache<[u8; 32], MmapBufferEntry>, Arc)>, +} /// Read chunks from remote host using ``BackupReader`` #[derive(Clone)] @@ -15,8 +24,7 @@ pub struct RemoteChunkReader { client: Arc, crypt_config: Option>, crypt_mode: CryptMode, - cache_hint: Arc>, - cache: Arc>>>, + cache: Arc>, } impl RemoteChunkReader { @@ -33,11 +41,42 @@ impl RemoteChunkReader { client, crypt_config, crypt_mode, - cache_hint: Arc::new(cache_hint), - cache: Arc::new(Mutex::new(HashMap::new())), + cache: Arc::new(Mutex::new(Cache { + hinted: HashMap::with_capacity(cache_hint.len()), + cache_hint, + lru: None, + })), } } + /// Create a new instance. + /// + /// Chunks listed in ``cache_hint`` are cached and kept in RAM, as well as the last + /// 'cache_lru' accessed chunks (the latter via a mmap file). + pub fn new_lru_cached( + client: Arc, + crypt_config: Option>, + crypt_mode: CryptMode, + cache_hint: HashMap<[u8; 32], usize>, + cache_lru: usize, + max_chunk_size: usize, + ) -> Result { + let new = Self::new(client, crypt_config, crypt_mode, cache_hint); + { + let mut cache = new.cache.lock().unwrap(); + cache.lru = Some(( + LruCache::new(cache_lru), + MmapBuffer::new( + // account for chunk size prefix + max_chunk_size + std::mem::size_of::(), + // the LruCache may hold one more item than its capacity during insert + cache_lru + 1, + )?, + )); + } + Ok(new) + } + /// Downloads raw chunk. This only verifies the (untrusted) CRC32, use /// DataBlob::verify_unencrypted or DataBlob::decode before storing/processing further. pub async fn read_raw_chunk(&self, digest: &[u8; 32]) -> Result { @@ -64,6 +103,51 @@ impl RemoteChunkReader { }, } } + + fn cache_get(&self, digest: &[u8; 32]) -> Option> { + let cache = &mut *self.cache.lock().unwrap(); + if let Some(data) = cache.hinted.get(digest) { + return Some(data.to_vec()); + } + + cache + .lru + .as_mut() + .map(|lru| lru.0.get_mut(*digest).map(|alloc| { + let s = std::mem::size_of::(); + let len = usize::from_ne_bytes(alloc[..s].try_into().unwrap()); + alloc[s..(len + s)].to_vec() + })) + .flatten() + } + + fn cache_insert(&self, digest: &[u8; 32], raw_data: &[u8]) { + let cache = &mut *self.cache.lock().unwrap(); + + // if hinted, always cache given digest + if cache.cache_hint.contains_key(digest) { + cache.hinted.insert(*digest, raw_data.to_vec()); + return; + } + + // otherwise put in LRU + if let Some((ref mut lru, ref mut mmap)) = cache.lru { + let mut alloc = match mmap.allocate() { + Ok(alloc) => alloc, + Err(err) => { + // *really* shouldn't happen, log to stderr/journal if it does - we can + // continue reading data, it just won't be cached + eprintln!("RemoteChunkReader: error on LRU alloc: {}", err); + return; + } + }; + let s = std::mem::size_of::(); + // prefix with chunk size + alloc[0..s].copy_from_slice(&raw_data.len().to_ne_bytes()[..]); + alloc[s..(s + raw_data.len())].copy_from_slice(raw_data); + lru.insert(*digest, alloc); + } + } } impl ReadChunk for RemoteChunkReader { @@ -72,18 +156,14 @@ impl ReadChunk for RemoteChunkReader { } fn read_chunk(&self, digest: &[u8; 32]) -> Result, Error> { - if let Some(raw_data) = (*self.cache.lock().unwrap()).get(digest) { - return Ok(raw_data.to_vec()); + if let Some(raw_data) = self.cache_get(digest) { + return Ok(raw_data); } let chunk = ReadChunk::read_raw_chunk(self, digest)?; let raw_data = chunk.decode(self.crypt_config.as_ref().map(Arc::as_ref), Some(digest))?; - - let use_cache = self.cache_hint.contains_key(digest); - if use_cache { - (*self.cache.lock().unwrap()).insert(*digest, raw_data.to_vec()); - } + self.cache_insert(digest, &raw_data); Ok(raw_data) } @@ -102,18 +182,14 @@ impl AsyncReadChunk for RemoteChunkReader { digest: &'a [u8; 32], ) -> Pin, Error>> + Send + 'a>> { Box::pin(async move { - if let Some(raw_data) = (*self.cache.lock().unwrap()).get(digest) { + if let Some(raw_data) = self.cache_get(digest) { return Ok(raw_data.to_vec()); } let chunk = Self::read_raw_chunk(self, digest).await?; let raw_data = chunk.decode(self.crypt_config.as_ref().map(Arc::as_ref), Some(digest))?; - - let use_cache = self.cache_hint.contains_key(digest); - if use_cache { - (*self.cache.lock().unwrap()).insert(*digest, raw_data.to_vec()); - } + self.cache_insert(digest, &raw_data); Ok(raw_data) }) -- 2.20.1