From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id AC85D1FF18A for ; Mon, 26 May 2025 16:15:26 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C2CA6342A2; Mon, 26 May 2025 16:15:39 +0200 (CEST) From: Hannes Laimer To: pbs-devel@lists.proxmox.com Date: Mon, 26 May 2025 16:14:37 +0200 Message-Id: <20250526141445.228717-5-h.laimer@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250526141445.228717-1-h.laimer@proxmox.com> References: <20250526141445.228717-1-h.laimer@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.026 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v2 04/12] datastore: separate functions into impl block X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" ... based on whether they are reading/writing. Signed-off-by: Hannes Laimer --- pbs-datastore/src/datastore.rs | 1282 ++++++++++++++++---------------- 1 file changed, 643 insertions(+), 639 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 6936875e..66a2e209 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -27,7 +27,6 @@ use pbs_api_types::{ DataStoreConfig, DatastoreFSyncLevel, DatastoreTuning, GarbageCollectionStatus, MaintenanceMode, MaintenanceType, Operation, UPID, }; -use pbs_config::BackupLockGuard; use crate::backup_info::{BackupDir, BackupGroup, BackupInfo, OLD_LOCKING}; use crate::chunk_store::{CanRead, CanWrite, ChunkStore, Lookup as L, Read as R, Write as W}; @@ -250,28 +249,87 @@ impl DataStore { } } -impl DataStore { - // This one just panics on everything - #[doc(hidden)] - pub(crate) unsafe fn new_test() -> Arc { - Arc::new(Self { - inner: unsafe { DataStoreImpl::new_test() }, - operation: None, - }) +impl DataStore { + /// Get a streaming iter over single-level backup namespaces of a datatstore, filtered by Ok + /// + /// The iterated item's result is already unwrapped, if it contained an error it will be + /// logged. Can be useful in iterator chain commands + pub fn iter_backup_ns_ok( + self: &Arc>, + ns: BackupNamespace, + ) -> Result + 'static, Error> { + let this = Arc::clone(self); + Ok( + ListNamespaces::new(Arc::clone(self), ns)?.filter_map(move |ns| match ns { + Ok(ns) => Some(ns), + Err(err) => { + log::error!("list groups error on datastore {} - {}", this.name(), err); + None + } + }), + ) + } + + /// Get a streaming iter over single-level backup namespaces of a datatstore, filtered by Ok + /// + /// The iterated item's result is already unwrapped, if it contained an error it will be + /// logged. Can be useful in iterator chain commands + pub fn recursive_iter_backup_ns_ok( + self: &Arc>, + ns: BackupNamespace, + max_depth: Option, + ) -> Result + 'static, Error> { + let this = Arc::clone(self); + Ok(if let Some(depth) = max_depth { + ListNamespacesRecursive::new_max_depth(Arc::clone(self), ns, depth)? + } else { + ListNamespacesRecursive::new(Arc::clone(self), ns)? + } + .filter_map(move |ns| match ns { + Ok(ns) => Some(ns), + Err(err) => { + log::error!("list groups error on datastore {} - {}", this.name(), err); + None + } + })) + } + + /// Get a streaming iter over top-level backup groups of a datastore of a particular type, + /// filtered by `Ok` results + /// + /// The iterated item's result is already unwrapped, if it contained an error it will be + /// logged. Can be useful in iterator chain commands + pub fn iter_backup_type_ok( + self: &Arc>, + ns: BackupNamespace, + ty: BackupType, + ) -> Result + 'static, Error> { + Ok(self.iter_backup_type(ns, ty)?.ok()) + } + + /// Get a streaming iter over top-level backup groups of a datatstore, filtered by Ok results + /// + /// The iterated item's result is already unwrapped, if it contained an error it will be + /// logged. Can be useful in iterator chain commands + pub fn iter_backup_groups_ok( + self: &Arc>, + ns: BackupNamespace, + ) -> Result + 'static, Error> { + Ok(self.iter_backup_groups(ns)?.ok()) } +} - pub fn lookup_datastore( +impl DataStore { + pub fn open_datastore( name: &str, operation: Option, - ) -> Result, Error> { + cache_entry: Option>>, + ) -> Result>, Error> { // Avoid TOCTOU between checking maintenance mode and updating active operation counter, as // we use it to decide whether it is okay to delete the datastore. - let _config_lock = pbs_config::datastore::lock_config()?; - // we could use the ConfigVersionCache's generation for staleness detection, but we load // the config anyway -> just use digest, additional benefit: manual changes get detected - let (config, digest) = pbs_config::datastore::config()?; - let config: DataStoreConfig = config.lookup("datastore", name)?; + let (config, digest, _lock) = Self::read_config(name)?; if let Some(maintenance_mode) = config.get_maintenance_mode() { if let Err(error) = maintenance_mode.check(operation) { @@ -280,16 +338,11 @@ impl DataStore { } if get_datastore_mount_status(&config) == Some(false) { - let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); - datastore_cache.remove(&config.name); bail!("datastore '{}' is not mounted", config.name); } - let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); - let entry = datastore_cache.get(name); - // reuse chunk store so that we keep using the same process locker instance! - let chunk_store = if let Some(datastore) = &entry { + let chunk_store = if let Some(datastore) = &cache_entry { let last_digest = datastore.last_digest.as_ref(); if let Some(true) = last_digest.map(|last_digest| last_digest == &digest) { if let Some(operation) = operation { @@ -306,73 +359,25 @@ impl DataStore { DatastoreTuning::API_SCHEMA .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, )?; - Arc::new(ChunkStore::open( + Arc::new(ChunkStore::::open( name, config.absolute_path(), tuning.sync_level.unwrap_or_default(), )?) }; - let datastore = DataStore::with_store_and_config(chunk_store, config, Some(digest))?; - - let datastore = Arc::new(datastore); - datastore_cache.insert(name.to_string(), datastore.clone()); + let datastore = Self::with_store_and_config(chunk_store, config, Some(digest))?; if let Some(operation) = operation { update_active_operations(name, operation, 1)?; } Ok(Arc::new(Self { - inner: datastore, + inner: datastore.into(), operation, })) } - /// removes all datastores that are not configured anymore - pub fn remove_unused_datastores() -> Result<(), Error> { - let (config, _digest) = pbs_config::datastore::config()?; - - let mut map = DATASTORE_MAP.lock().unwrap(); - // removes all elements that are not in the config - map.retain(|key, _| config.sections.contains_key(key)); - Ok(()) - } - - /// trigger clearing cache entry based on maintenance mode. Entry will only - /// be cleared iff there is no other task running, if there is, the end of the - /// last running task will trigger the clearing of the cache entry. - pub fn update_datastore_cache(name: &str) -> Result<(), Error> { - let (config, _digest) = pbs_config::datastore::config()?; - let datastore: DataStoreConfig = config.lookup("datastore", name)?; - if datastore - .get_maintenance_mode() - .is_some_and(|m| m.clear_from_cache()) - { - // the datastore drop handler does the checking if tasks are running and clears the - // cache entry, so we just have to trigger it here - let _ = DataStore::lookup_datastore(name, Some(Operation::Lookup)); - } - - Ok(()) - } - - /// Open a raw database given a name and a path. - /// - /// # Safety - /// See the safety section in `open_from_config` - pub unsafe fn open_path( - name: &str, - path: impl AsRef, - operation: Option, - ) -> Result, Error> { - let path = path - .as_ref() - .to_str() - .ok_or_else(|| format_err!("non-utf8 paths not supported"))? - .to_owned(); - unsafe { Self::open_from_config(DataStoreConfig::new(name.to_owned(), path), operation) } - } - /// Open a datastore given a raw configuration. /// /// # Safety @@ -394,7 +399,7 @@ impl DataStore { DatastoreTuning::API_SCHEMA .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, )?; - let chunk_store = ChunkStore::open( + let chunk_store = ChunkStore::::open( &name, config.absolute_path(), tuning.sync_level.unwrap_or_default(), @@ -413,10 +418,10 @@ impl DataStore { } fn with_store_and_config( - chunk_store: Arc, + chunk_store: Arc>, config: DataStoreConfig, last_digest: Option<[u8; 32]>, - ) -> Result { + ) -> Result, Error> { let mut gc_status_path = chunk_store.base_path(); gc_status_path.push(".gc-status"); @@ -448,6 +453,23 @@ impl DataStore { }) } + /// Open a raw database given a name and a path. + /// + /// # Safety + /// See the safety section in `open_from_config` + pub unsafe fn open_path( + name: &str, + path: impl AsRef, + operation: Option, + ) -> Result, Error> { + let path = path + .as_ref() + .to_str() + .ok_or_else(|| format_err!("non-utf8 paths not supported"))? + .to_owned(); + unsafe { Self::open_from_config(DataStoreConfig::new(name.to_owned(), path), operation) } + } + pub fn get_chunk_iterator( &self, ) -> Result< @@ -457,53 +479,6 @@ impl DataStore { self.inner.chunk_store.get_chunk_iterator() } - pub fn create_fixed_writer>( - &self, - filename: P, - size: usize, - chunk_size: usize, - ) -> Result { - let index = FixedIndexWriter::create( - self.inner.chunk_store.clone(), - filename.as_ref(), - size, - chunk_size, - )?; - - Ok(index) - } - - pub fn open_fixed_reader>( - &self, - filename: P, - ) -> Result { - let full_path = self.inner.chunk_store.relative_path(filename.as_ref()); - - let index = FixedIndexReader::open(&full_path)?; - - Ok(index) - } - - pub fn create_dynamic_writer>( - &self, - filename: P, - ) -> Result { - let index = DynamicIndexWriter::create(self.inner.chunk_store.clone(), filename.as_ref())?; - - Ok(index) - } - - pub fn open_dynamic_reader>( - &self, - filename: P, - ) -> Result { - let full_path = self.inner.chunk_store.relative_path(filename.as_ref()); - - let index = DynamicIndexReader::open(&full_path)?; - - Ok(index) - } - pub fn open_index

(&self, filename: P) -> Result, Error> where P: AsRef, @@ -543,73 +518,26 @@ impl DataStore { Ok(()) } - pub fn name(&self) -> &str { - self.inner.chunk_store.name() - } - - pub fn base_path(&self) -> PathBuf { - self.inner.chunk_store.base_path() - } - - /// Returns the absolute path for a backup namespace on this datastore - pub fn namespace_path(&self, ns: &BackupNamespace) -> PathBuf { - let mut path = self.base_path(); - path.reserve(ns.path_len()); - for part in ns.components() { - path.push("ns"); - path.push(part); - } - path - } + pub fn open_fixed_reader>( + &self, + filename: P, + ) -> Result { + let full_path = self.inner.chunk_store.relative_path(filename.as_ref()); - /// Returns the absolute path for a backup_type - pub fn type_path(&self, ns: &BackupNamespace, backup_type: BackupType) -> PathBuf { - let mut full_path = self.namespace_path(ns); - full_path.push(backup_type.to_string()); - full_path - } + let index = FixedIndexReader::open(&full_path)?; - /// Returns the absolute path for a backup_group - pub fn group_path( - &self, - ns: &BackupNamespace, - backup_group: &pbs_api_types::BackupGroup, - ) -> PathBuf { - let mut full_path = self.namespace_path(ns); - full_path.push(backup_group.to_string()); - full_path + Ok(index) } - /// Returns the absolute path for backup_dir - pub fn snapshot_path( + pub fn open_dynamic_reader>( &self, - ns: &BackupNamespace, - backup_dir: &pbs_api_types::BackupDir, - ) -> PathBuf { - let mut full_path = self.namespace_path(ns); - full_path.push(backup_dir.to_string()); - full_path - } - - /// Create a backup namespace. - pub fn create_namespace( - self: &Arc, - parent: &BackupNamespace, - name: String, - ) -> Result { - if !self.namespace_exists(parent) { - bail!("cannot create new namespace, parent {parent} doesn't already exists"); - } - - // construct ns before mkdir to enforce max-depth and name validity - let ns = BackupNamespace::from_parent_ns(parent, name)?; - - let mut ns_full_path = self.base_path(); - ns_full_path.push(ns.path()); + filename: P, + ) -> Result { + let full_path = self.inner.chunk_store.relative_path(filename.as_ref()); - std::fs::create_dir_all(ns_full_path)?; + let index = DynamicIndexReader::open(&full_path)?; - Ok(ns) + Ok(index) } /// Returns if the given namespace exists on the datastore @@ -619,7 +547,401 @@ impl DataStore { path.exists() } - /// Remove all backup groups of a single namespace level but not the namespace itself. + /// Returns the time of the last successful backup + /// + /// Or None if there is no backup in the group (or the group dir does not exist). + pub fn last_successful_backup( + self: &Arc, + ns: &BackupNamespace, + backup_group: &pbs_api_types::BackupGroup, + ) -> Result, Error> { + let backup_group = self.backup_group(ns.clone(), backup_group.clone()); + + let group_path = backup_group.full_group_path(); + + if group_path.exists() { + backup_group.last_successful_backup() + } else { + Ok(None) + } + } + + /// Returns the backup owner. + /// + /// The backup owner is the entity who first created the backup group. + pub fn get_owner( + &self, + ns: &BackupNamespace, + backup_group: &pbs_api_types::BackupGroup, + ) -> Result { + let full_path = self.owner_path(ns, backup_group); + let owner = proxmox_sys::fs::file_read_firstline(full_path)?; + owner + .trim_end() // remove trailing newline + .parse() + .map_err(|err| format_err!("parsing owner for {backup_group} failed: {err}")) + } + + pub fn owns_backup( + &self, + ns: &BackupNamespace, + backup_group: &pbs_api_types::BackupGroup, + auth_id: &Authid, + ) -> Result { + let owner = self.get_owner(ns, backup_group)?; + + Ok(check_backup_owner(&owner, auth_id).is_ok()) + } + + /// Get a streaming iter over single-level backup namespaces of a datatstore + /// + /// The iterated item is still a Result that can contain errors from rather unexptected FS or + /// parsing errors. + pub fn iter_backup_ns( + self: &Arc>, + ns: BackupNamespace, + ) -> Result { + ListNamespaces::new(Arc::clone(self), ns) + } + + /// Get a streaming iter over single-level backup namespaces of a datatstore + /// + /// The iterated item is still a Result that can contain errors from rather unexptected FS or + /// parsing errors. + pub fn recursive_iter_backup_ns( + self: &Arc>, + ns: BackupNamespace, + ) -> Result { + ListNamespacesRecursive::new(Arc::clone(self), ns) + } + + /// Get a streaming iter over top-level backup groups of a datatstore of a particular type. + /// + /// The iterated item is still a Result that can contain errors from rather unexptected FS or + /// parsing errors. + pub fn iter_backup_type( + self: &Arc>, + ns: BackupNamespace, + ty: BackupType, + ) -> Result { + ListGroupsType::new(Arc::clone(self), ns, ty) + } + + /// Get a streaming iter over top-level backup groups of a datatstore + /// + /// The iterated item is still a Result that can contain errors from rather unexptected FS or + /// parsing errors. + pub fn iter_backup_groups( + self: &Arc>, + ns: BackupNamespace, + ) -> Result { + ListGroups::new(Arc::clone(self), ns) + } + + /// Get a in-memory vector for all top-level backup groups of a datatstore + /// + /// NOTE: using the iterator directly is most often more efficient w.r.t. memory usage + pub fn list_backup_groups( + self: &Arc>, + ns: BackupNamespace, + ) -> Result, Error> { + ListGroups::new(Arc::clone(self), ns)?.collect() + } + + /// Lookup all index files to be found in the datastore without taking any logical iteration + /// into account. + /// The filesystem is walked recursevly to detect index files based on their archive type based + /// on the filename. This however excludes the chunks folder, hidden files and does not follow + /// symlinks. + fn list_index_files(&self) -> Result, Error> { + let base = self.base_path(); + + let mut list = HashSet::new(); + + use walkdir::WalkDir; + + let walker = WalkDir::new(base).into_iter(); + + // make sure we skip .chunks (and other hidden files to keep it simple) + fn is_hidden(entry: &walkdir::DirEntry) -> bool { + entry + .file_name() + .to_str() + .map(|s| s.starts_with('.')) + .unwrap_or(false) + } + let handle_entry_err = |err: walkdir::Error| { + // first, extract the actual IO error and the affected path + let (inner, path) = match (err.io_error(), err.path()) { + (None, _) => return Ok(()), // not an IO-error + (Some(inner), Some(path)) => (inner, path), + (Some(inner), None) => bail!("unexpected error on datastore traversal: {inner}"), + }; + if inner.kind() == io::ErrorKind::PermissionDenied { + if err.depth() <= 1 && path.ends_with("lost+found") { + // allow skipping of (root-only) ext4 fsck-directory on EPERM .. + return Ok(()); + } + // .. but do not ignore EPERM in general, otherwise we might prune too many chunks. + // E.g., if users messed up with owner/perms on a rsync + bail!("cannot continue garbage-collection safely, permission denied on: {path:?}"); + } else if inner.kind() == io::ErrorKind::NotFound { + log::info!("ignoring vanished file: {path:?}"); + return Ok(()); + } else { + bail!("unexpected error on datastore traversal: {inner} - {path:?}"); + } + }; + for entry in walker.filter_entry(|e| !is_hidden(e)) { + let path = match entry { + Ok(entry) => entry.into_path(), + Err(err) => { + handle_entry_err(err)?; + continue; + } + }; + if let Ok(archive_type) = ArchiveType::from_path(&path) { + if archive_type == ArchiveType::FixedIndex + || archive_type == ArchiveType::DynamicIndex + { + list.insert(path); + } + } + } + + Ok(list) + } + + // Similar to open index, but return with Ok(None) if index file vanished. + fn open_index_reader(&self, absolute_path: &Path) -> Result>, Error> { + let archive_type = match ArchiveType::from_path(absolute_path) { + // ignore archives with unknown archive type + Ok(ArchiveType::Blob) | Err(_) => bail!("unexpected archive type"), + Ok(archive_type) => archive_type, + }; + + if absolute_path.is_relative() { + bail!("expected absolute path, got '{absolute_path:?}'"); + } + + let file = match std::fs::File::open(absolute_path) { + Ok(file) => file, + // ignore vanished files + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), + Err(err) => { + return Err(Error::from(err).context(format!("can't open file '{absolute_path:?}'"))) + } + }; + + match archive_type { + ArchiveType::FixedIndex => { + let reader = FixedIndexReader::new(file) + .with_context(|| format!("can't open fixed index '{absolute_path:?}'"))?; + Ok(Some(Box::new(reader))) + } + ArchiveType::DynamicIndex => { + let reader = DynamicIndexReader::new(file) + .with_context(|| format!("can't open dynamic index '{absolute_path:?}'"))?; + Ok(Some(Box::new(reader))) + } + ArchiveType::Blob => bail!("unexpected archive type blob"), + } + } + + pub fn last_gc_status(&self) -> GarbageCollectionStatus { + self.inner.last_gc_status.lock().unwrap().clone() + } + + pub fn garbage_collection_running(&self) -> bool { + self.inner.gc_mutex.try_lock().is_err() + } + + pub fn try_shared_chunk_store_lock(&self) -> Result { + self.inner.chunk_store.try_shared_lock() + } + + pub fn stat_chunk(&self, digest: &[u8; 32]) -> Result { + let (chunk_path, _digest_str) = self.inner.chunk_store.chunk_path(digest); + std::fs::metadata(chunk_path).map_err(Error::from) + } + + pub fn load_chunk(&self, digest: &[u8; 32]) -> Result { + let (chunk_path, digest_str) = self.inner.chunk_store.chunk_path(digest); + + proxmox_lang::try_block!({ + let mut file = std::fs::File::open(&chunk_path)?; + DataBlob::load_from_reader(&mut file) + }) + .map_err(|err| { + format_err!( + "store '{}', unable to load chunk '{}' - {}", + self.name(), + digest_str, + err, + ) + }) + } + + /// returns a list of chunks sorted by their inode number on disk chunks that couldn't get + /// stat'ed are placed at the end of the list + pub fn get_chunks_in_order( + &self, + index: &(dyn IndexFile + Send), + skip_chunk: F, + check_abort: A, + ) -> Result, Error> + where + F: Fn(&[u8; 32]) -> bool, + A: Fn(usize) -> Result<(), Error>, + { + let index_count = index.index_count(); + let mut chunk_list = Vec::with_capacity(index_count); + use std::os::unix::fs::MetadataExt; + for pos in 0..index_count { + check_abort(pos)?; + + let info = index.chunk_info(pos).unwrap(); + + if skip_chunk(&info.digest) { + continue; + } + + let ino = match self.inner.chunk_order { + ChunkOrder::Inode => { + match self.stat_chunk(&info.digest) { + Err(_) => u64::MAX, // could not stat, move to end of list + Ok(metadata) => metadata.ino(), + } + } + ChunkOrder::None => 0, + }; + + chunk_list.push((pos, ino)); + } + + match self.inner.chunk_order { + // sorting by inode improves data locality, which makes it lots faster on spinners + ChunkOrder::Inode => { + chunk_list.sort_unstable_by(|(_, ino_a), (_, ino_b)| ino_a.cmp(ino_b)) + } + ChunkOrder::None => {} + } + + Ok(chunk_list) + } + + /// Open a snapshot (backup directory) from this datastore. + pub fn backup_dir_from_parts( + self: &Arc, + ns: BackupNamespace, + ty: BackupType, + id: D, + time: i64, + ) -> Result + where + D: Into, + { + self.backup_dir(ns, (ty, id.into(), time).into()) + } + + /// Open a snapshot (backup directory) from this datastore with a cached rfc3339 time string. + pub fn backup_dir_with_rfc3339>( + self: &Arc, + group: BackupGroup, + time_string: D, + ) -> Result { + BackupDir::with_rfc3339(group, time_string.into()) + } + + /// Open a backup group from this datastore. + pub fn backup_group_from_parts( + self: &Arc, + ns: BackupNamespace, + ty: BackupType, + id: D, + ) -> BackupGroup + where + D: Into, + { + self.backup_group(ns, (ty, id.into()).into()) + } + + /* + /// Open a backup group from this datastore by backup group path such as `vm/100`. + /// + /// Convenience method for `store.backup_group(path.parse()?)` + pub fn backup_group_from_path(self: &Arc, path: &str) -> Result { + todo!("split out the namespace"); + } + */ + + /// Open a backup group from this datastore. + pub fn backup_group( + self: &Arc, + ns: BackupNamespace, + group: pbs_api_types::BackupGroup, + ) -> BackupGroup { + BackupGroup::new(Arc::clone(self), ns, group) + } + + /// Open a snapshot (backup directory) from this datastore. + pub fn backup_dir( + self: &Arc, + ns: BackupNamespace, + dir: pbs_api_types::BackupDir, + ) -> Result { + BackupDir::with_group(self.backup_group(ns, dir.group), dir.time) + } +} + +impl DataStore { + pub fn create_fixed_writer>( + &self, + filename: P, + size: usize, + chunk_size: usize, + ) -> Result { + let index = FixedIndexWriter::create( + self.inner.chunk_store.clone(), + filename.as_ref(), + size, + chunk_size, + )?; + + Ok(index) + } + + pub fn create_dynamic_writer>( + &self, + filename: P, + ) -> Result { + let index = DynamicIndexWriter::create(self.inner.chunk_store.clone(), filename.as_ref())?; + + Ok(index) + } + + /// Create a backup namespace. + pub fn create_namespace( + self: &Arc, + parent: &BackupNamespace, + name: String, + ) -> Result { + if !self.namespace_exists(parent) { + bail!("cannot create new namespace, parent {parent} doesn't already exists"); + } + + // construct ns before mkdir to enforce max-depth and name validity + let ns = BackupNamespace::from_parent_ns(parent, name)?; + + let mut ns_full_path = self.base_path(); + ns_full_path.push(ns.path()); + + std::fs::create_dir_all(ns_full_path)?; + + Ok(ns) + } + + /// Remove all backup groups of a single namespace level but not the namespace itself. /// /// Does *not* descends into child-namespaces and doesn't remoes the namespace itself either. /// @@ -719,85 +1041,30 @@ impl DataStore { Ok((removed_all_requested, stats)) } - /// Remove a complete backup group including all snapshots. - /// - /// Returns `BackupGroupDeleteStats`, containing the number of deleted snapshots - /// and number of protected snaphsots, which therefore were not removed. - pub fn remove_backup_group( - self: &Arc, - ns: &BackupNamespace, - backup_group: &pbs_api_types::BackupGroup, - ) -> Result { - let backup_group = self.backup_group(ns.clone(), backup_group.clone()); - - backup_group.destroy() - } - - /// Remove a backup directory including all content - pub fn remove_backup_dir( - self: &Arc, - ns: &BackupNamespace, - backup_dir: &pbs_api_types::BackupDir, - force: bool, - ) -> Result<(), Error> { - let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?; - - backup_dir.destroy(force) - } - - /// Returns the time of the last successful backup - /// - /// Or None if there is no backup in the group (or the group dir does not exist). - pub fn last_successful_backup( - self: &Arc, - ns: &BackupNamespace, - backup_group: &pbs_api_types::BackupGroup, - ) -> Result, Error> { - let backup_group = self.backup_group(ns.clone(), backup_group.clone()); - - let group_path = backup_group.full_group_path(); - - if group_path.exists() { - backup_group.last_successful_backup() - } else { - Ok(None) - } - } - - /// Return the path of the 'owner' file. - pub(super) fn owner_path( - &self, - ns: &BackupNamespace, - group: &pbs_api_types::BackupGroup, - ) -> PathBuf { - self.group_path(ns, group).join("owner") - } - - /// Returns the backup owner. + /// Remove a complete backup group including all snapshots. /// - /// The backup owner is the entity who first created the backup group. - pub fn get_owner( - &self, + /// Returns `BackupGroupDeleteStats`, containing the number of deleted snapshots + /// and number of protected snaphsots, which therefore were not removed. + pub fn remove_backup_group( + self: &Arc, ns: &BackupNamespace, backup_group: &pbs_api_types::BackupGroup, - ) -> Result { - let full_path = self.owner_path(ns, backup_group); - let owner = proxmox_sys::fs::file_read_firstline(full_path)?; - owner - .trim_end() // remove trailing newline - .parse() - .map_err(|err| format_err!("parsing owner for {backup_group} failed: {err}")) + ) -> Result { + let backup_group = self.backup_group(ns.clone(), backup_group.clone()); + + backup_group.destroy() } - pub fn owns_backup( - &self, + /// Remove a backup directory including all content + pub fn remove_backup_dir( + self: &Arc, ns: &BackupNamespace, - backup_group: &pbs_api_types::BackupGroup, - auth_id: &Authid, - ) -> Result { - let owner = self.get_owner(ns, backup_group)?; + backup_dir: &pbs_api_types::BackupDir, + force: bool, + ) -> Result<(), Error> { + let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?; - Ok(check_backup_owner(&owner, auth_id).is_ok()) + backup_dir.destroy(force) } /// Set the backup owner. @@ -900,229 +1167,6 @@ impl DataStore { } } - /// Get a streaming iter over single-level backup namespaces of a datatstore - /// - /// The iterated item is still a Result that can contain errors from rather unexptected FS or - /// parsing errors. - pub fn iter_backup_ns( - self: &Arc, - ns: BackupNamespace, - ) -> Result { - ListNamespaces::new(Arc::clone(self), ns) - } - - /// Get a streaming iter over single-level backup namespaces of a datatstore, filtered by Ok - /// - /// The iterated item's result is already unwrapped, if it contained an error it will be - /// logged. Can be useful in iterator chain commands - pub fn iter_backup_ns_ok( - self: &Arc, - ns: BackupNamespace, - ) -> Result + 'static, Error> { - let this = Arc::clone(self); - Ok( - ListNamespaces::new(Arc::clone(self), ns)?.filter_map(move |ns| match ns { - Ok(ns) => Some(ns), - Err(err) => { - log::error!("list groups error on datastore {} - {}", this.name(), err); - None - } - }), - ) - } - - /// Get a streaming iter over single-level backup namespaces of a datatstore - /// - /// The iterated item is still a Result that can contain errors from rather unexptected FS or - /// parsing errors. - pub fn recursive_iter_backup_ns( - self: &Arc, - ns: BackupNamespace, - ) -> Result { - ListNamespacesRecursive::new(Arc::clone(self), ns) - } - - /// Get a streaming iter over single-level backup namespaces of a datatstore, filtered by Ok - /// - /// The iterated item's result is already unwrapped, if it contained an error it will be - /// logged. Can be useful in iterator chain commands - pub fn recursive_iter_backup_ns_ok( - self: &Arc, - ns: BackupNamespace, - max_depth: Option, - ) -> Result + 'static, Error> { - let this = Arc::clone(self); - Ok(if let Some(depth) = max_depth { - ListNamespacesRecursive::new_max_depth(Arc::clone(self), ns, depth)? - } else { - ListNamespacesRecursive::new(Arc::clone(self), ns)? - } - .filter_map(move |ns| match ns { - Ok(ns) => Some(ns), - Err(err) => { - log::error!("list groups error on datastore {} - {}", this.name(), err); - None - } - })) - } - - /// Get a streaming iter over top-level backup groups of a datatstore of a particular type. - /// - /// The iterated item is still a Result that can contain errors from rather unexptected FS or - /// parsing errors. - pub fn iter_backup_type( - self: &Arc, - ns: BackupNamespace, - ty: BackupType, - ) -> Result { - ListGroupsType::new(Arc::clone(self), ns, ty) - } - - /// Get a streaming iter over top-level backup groups of a datastore of a particular type, - /// filtered by `Ok` results - /// - /// The iterated item's result is already unwrapped, if it contained an error it will be - /// logged. Can be useful in iterator chain commands - pub fn iter_backup_type_ok( - self: &Arc, - ns: BackupNamespace, - ty: BackupType, - ) -> Result + 'static, Error> { - Ok(self.iter_backup_type(ns, ty)?.ok()) - } - - /// Get a streaming iter over top-level backup groups of a datatstore - /// - /// The iterated item is still a Result that can contain errors from rather unexptected FS or - /// parsing errors. - pub fn iter_backup_groups( - self: &Arc, - ns: BackupNamespace, - ) -> Result { - ListGroups::new(Arc::clone(self), ns) - } - - /// Get a streaming iter over top-level backup groups of a datatstore, filtered by Ok results - /// - /// The iterated item's result is already unwrapped, if it contained an error it will be - /// logged. Can be useful in iterator chain commands - pub fn iter_backup_groups_ok( - self: &Arc, - ns: BackupNamespace, - ) -> Result + 'static, Error> { - Ok(self.iter_backup_groups(ns)?.ok()) - } - - /// Get a in-memory vector for all top-level backup groups of a datatstore - /// - /// NOTE: using the iterator directly is most often more efficient w.r.t. memory usage - pub fn list_backup_groups( - self: &Arc, - ns: BackupNamespace, - ) -> Result, Error> { - ListGroups::new(Arc::clone(self), ns)?.collect() - } - - /// Lookup all index files to be found in the datastore without taking any logical iteration - /// into account. - /// The filesystem is walked recursevly to detect index files based on their archive type based - /// on the filename. This however excludes the chunks folder, hidden files and does not follow - /// symlinks. - fn list_index_files(&self) -> Result, Error> { - let base = self.base_path(); - - let mut list = HashSet::new(); - - use walkdir::WalkDir; - - let walker = WalkDir::new(base).into_iter(); - - // make sure we skip .chunks (and other hidden files to keep it simple) - fn is_hidden(entry: &walkdir::DirEntry) -> bool { - entry - .file_name() - .to_str() - .map(|s| s.starts_with('.')) - .unwrap_or(false) - } - let handle_entry_err = |err: walkdir::Error| { - // first, extract the actual IO error and the affected path - let (inner, path) = match (err.io_error(), err.path()) { - (None, _) => return Ok(()), // not an IO-error - (Some(inner), Some(path)) => (inner, path), - (Some(inner), None) => bail!("unexpected error on datastore traversal: {inner}"), - }; - if inner.kind() == io::ErrorKind::PermissionDenied { - if err.depth() <= 1 && path.ends_with("lost+found") { - // allow skipping of (root-only) ext4 fsck-directory on EPERM .. - return Ok(()); - } - // .. but do not ignore EPERM in general, otherwise we might prune too many chunks. - // E.g., if users messed up with owner/perms on a rsync - bail!("cannot continue garbage-collection safely, permission denied on: {path:?}"); - } else if inner.kind() == io::ErrorKind::NotFound { - log::info!("ignoring vanished file: {path:?}"); - return Ok(()); - } else { - bail!("unexpected error on datastore traversal: {inner} - {path:?}"); - } - }; - for entry in walker.filter_entry(|e| !is_hidden(e)) { - let path = match entry { - Ok(entry) => entry.into_path(), - Err(err) => { - handle_entry_err(err)?; - continue; - } - }; - if let Ok(archive_type) = ArchiveType::from_path(&path) { - if archive_type == ArchiveType::FixedIndex - || archive_type == ArchiveType::DynamicIndex - { - list.insert(path); - } - } - } - - Ok(list) - } - - // Similar to open index, but return with Ok(None) if index file vanished. - fn open_index_reader(&self, absolute_path: &Path) -> Result>, Error> { - let archive_type = match ArchiveType::from_path(absolute_path) { - // ignore archives with unknown archive type - Ok(ArchiveType::Blob) | Err(_) => bail!("unexpected archive type"), - Ok(archive_type) => archive_type, - }; - - if absolute_path.is_relative() { - bail!("expected absolute path, got '{absolute_path:?}'"); - } - - let file = match std::fs::File::open(absolute_path) { - Ok(file) => file, - // ignore vanished files - Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), - Err(err) => { - return Err(Error::from(err).context(format!("can't open file '{absolute_path:?}'"))) - } - }; - - match archive_type { - ArchiveType::FixedIndex => { - let reader = FixedIndexReader::new(file) - .with_context(|| format!("can't open fixed index '{absolute_path:?}'"))?; - Ok(Some(Box::new(reader))) - } - ArchiveType::DynamicIndex => { - let reader = DynamicIndexReader::new(file) - .with_context(|| format!("can't open dynamic index '{absolute_path:?}'"))?; - Ok(Some(Box::new(reader))) - } - ArchiveType::Blob => bail!("unexpected archive type blob"), - } - } - // mark chunks used by ``index`` as used fn index_mark_used_chunks( &self, @@ -1301,15 +1345,7 @@ impl DataStore { warn!("Found {strange_paths_count} index files outside of expected directory scheme"); } - Ok(()) - } - - pub fn last_gc_status(&self) -> GarbageCollectionStatus { - self.inner.last_gc_status.lock().unwrap().clone() - } - - pub fn garbage_collection_running(&self) -> bool { - self.inner.gc_mutex.try_lock().is_err() + Ok(()) } pub fn garbage_collection( @@ -1479,14 +1515,6 @@ impl DataStore { Ok(()) } - pub fn try_shared_chunk_store_lock(&self) -> Result { - self.inner.chunk_store.try_shared_lock() - } - - pub fn chunk_path(&self, digest: &[u8; 32]) -> (PathBuf, String) { - self.inner.chunk_store.chunk_path(digest) - } - pub fn cond_touch_chunk(&self, digest: &[u8; 32], assert_exists: bool) -> Result { self.inner .chunk_store @@ -1497,28 +1525,6 @@ impl DataStore { self.inner.chunk_store.insert_chunk(chunk, digest) } - pub fn stat_chunk(&self, digest: &[u8; 32]) -> Result { - let (chunk_path, _digest_str) = self.inner.chunk_store.chunk_path(digest); - std::fs::metadata(chunk_path).map_err(Error::from) - } - - pub fn load_chunk(&self, digest: &[u8; 32]) -> Result { - let (chunk_path, digest_str) = self.inner.chunk_store.chunk_path(digest); - - proxmox_lang::try_block!({ - let mut file = std::fs::File::open(&chunk_path)?; - DataBlob::load_from_reader(&mut file) - }) - .map_err(|err| { - format_err!( - "store '{}', unable to load chunk '{}' - {}", - self.name(), - digest_str, - err, - ) - }) - } - /// Updates the protection status of the specified snapshot. pub fn update_protection(&self, backup_dir: &BackupDir, protection: bool) -> Result<(), Error> { let full_path = backup_dir.full_path(); @@ -1545,128 +1551,6 @@ impl DataStore { Ok(()) } - pub fn verify_new(&self) -> bool { - self.inner.verify_new - } - - /// returns a list of chunks sorted by their inode number on disk chunks that couldn't get - /// stat'ed are placed at the end of the list - pub fn get_chunks_in_order( - &self, - index: &(dyn IndexFile + Send), - skip_chunk: F, - check_abort: A, - ) -> Result, Error> - where - F: Fn(&[u8; 32]) -> bool, - A: Fn(usize) -> Result<(), Error>, - { - let index_count = index.index_count(); - let mut chunk_list = Vec::with_capacity(index_count); - use std::os::unix::fs::MetadataExt; - for pos in 0..index_count { - check_abort(pos)?; - - let info = index.chunk_info(pos).unwrap(); - - if skip_chunk(&info.digest) { - continue; - } - - let ino = match self.inner.chunk_order { - ChunkOrder::Inode => { - match self.stat_chunk(&info.digest) { - Err(_) => u64::MAX, // could not stat, move to end of list - Ok(metadata) => metadata.ino(), - } - } - ChunkOrder::None => 0, - }; - - chunk_list.push((pos, ino)); - } - - match self.inner.chunk_order { - // sorting by inode improves data locality, which makes it lots faster on spinners - ChunkOrder::Inode => { - chunk_list.sort_unstable_by(|(_, ino_a), (_, ino_b)| ino_a.cmp(ino_b)) - } - ChunkOrder::None => {} - } - - Ok(chunk_list) - } - - /// Open a backup group from this datastore. - pub fn backup_group( - self: &Arc, - ns: BackupNamespace, - group: pbs_api_types::BackupGroup, - ) -> BackupGroup { - BackupGroup::new(Arc::clone(self), ns, group) - } - - /// Open a backup group from this datastore. - pub fn backup_group_from_parts( - self: &Arc, - ns: BackupNamespace, - ty: BackupType, - id: T, - ) -> BackupGroup - where - T: Into, - { - self.backup_group(ns, (ty, id.into()).into()) - } - - /* - /// Open a backup group from this datastore by backup group path such as `vm/100`. - /// - /// Convenience method for `store.backup_group(path.parse()?)` - pub fn backup_group_from_path(self: &Arc, path: &str) -> Result { - todo!("split out the namespace"); - } - */ - - /// Open a snapshot (backup directory) from this datastore. - pub fn backup_dir( - self: &Arc, - ns: BackupNamespace, - dir: pbs_api_types::BackupDir, - ) -> Result { - BackupDir::with_group(self.backup_group(ns, dir.group), dir.time) - } - - /// Open a snapshot (backup directory) from this datastore. - pub fn backup_dir_from_parts( - self: &Arc, - ns: BackupNamespace, - ty: BackupType, - id: T, - time: i64, - ) -> Result - where - T: Into, - { - self.backup_dir(ns, (ty, id.into(), time).into()) - } - - /// Open a snapshot (backup directory) from this datastore with a cached rfc3339 time string. - pub fn backup_dir_with_rfc3339>( - self: &Arc, - group: BackupGroup, - time_string: T, - ) -> Result { - BackupDir::with_rfc3339(group, time_string.into()) - } - - /* - /// Open a snapshot (backup directory) from this datastore by a snapshot path. - pub fn backup_dir_from_path(self: &Arc, path: &str) -> Result { - todo!("split out the namespace"); - } - */ - /// Syncs the filesystem of the datastore if 'sync_level' is set to /// [`DatastoreFSyncLevel::Filesystem`]. Uses syncfs(2). pub fn try_ensure_sync_level(&self) -> Result<(), Error> { @@ -1786,6 +1670,126 @@ impl DataStore { Ok(()) } +} + +impl DataStore { + #[doc(hidden)] + pub(crate) fn new_test() -> Arc { + Arc::new(Self { + inner: DataStoreImpl::new_test(), + operation: None, + }) + } + + pub fn read_config(name: &str) -> Result<(DataStoreConfig, [u8; 32], BackupLockGuard), Error> { + let lock = pbs_config::datastore::lock_config()?; + + let (config, digest) = pbs_config::datastore::config()?; + let config: DataStoreConfig = config.lookup("datastore", name)?; + Ok((config, digest, lock)) + } + + /// removes all datastores that are not configured anymore + pub fn remove_unused_datastores() -> Result<(), Error> { + let (config, _digest) = pbs_config::datastore::config()?; + + let mut map_read = DATASTORE_MAP_READ.lock().unwrap(); + let mut map_write = DATASTORE_MAP_READ.lock().unwrap(); + // removes all elements that are not in the config + map_read.retain(|key, _| config.sections.contains_key(key)); + map_write.retain(|key, _| config.sections.contains_key(key)); + Ok(()) + } + + /// trigger clearing cache entry based on maintenance mode. Entry will only + /// be cleared iff there is no other task running, if there is, the end of the + /// last running task will trigger the clearing of the cache entry. + pub fn update_datastore_cache(name: &str) -> Result<(), Error> { + let (config, _digest) = pbs_config::datastore::config()?; + let datastore: DataStoreConfig = config.lookup("datastore", name)?; + if datastore + .get_maintenance_mode() + .is_some_and(|m| m.clear_from_cache()) + { + // the datastore drop handler does the checking if tasks are running and clears the + // cache entry, so we just have to trigger it here + let _ = DataStore::::lookup_datastore(name); + } + + Ok(()) + } + + pub fn name(&self) -> &str { + self.inner.chunk_store.name() + } + + pub fn base_path(&self) -> PathBuf { + self.inner.chunk_store.base_path() + } + + /// Returns the absolute path for a backup namespace on this datastore + pub fn namespace_path(&self, ns: &BackupNamespace) -> PathBuf { + let mut path = self.base_path(); + path.reserve(ns.path_len()); + for part in ns.components() { + path.push("ns"); + path.push(part); + } + path + } + + /// Returns the absolute path for a backup_type + pub fn type_path(&self, ns: &BackupNamespace, backup_type: BackupType) -> PathBuf { + let mut full_path = self.namespace_path(ns); + full_path.push(backup_type.to_string()); + full_path + } + + /// Returns the absolute path for a backup_group + pub fn group_path( + &self, + ns: &BackupNamespace, + backup_group: &pbs_api_types::BackupGroup, + ) -> PathBuf { + let mut full_path = self.namespace_path(ns); + full_path.push(backup_group.to_string()); + full_path + } + + /// Returns the absolute path for backup_dir + pub fn snapshot_path( + &self, + ns: &BackupNamespace, + backup_dir: &pbs_api_types::BackupDir, + ) -> PathBuf { + let mut full_path = self.namespace_path(ns); + full_path.push(backup_dir.to_string()); + full_path + } + + /// Return the path of the 'owner' file. + pub(super) fn owner_path( + &self, + ns: &BackupNamespace, + group: &pbs_api_types::BackupGroup, + ) -> PathBuf { + self.group_path(ns, group).join("owner") + } + + pub fn chunk_path(&self, digest: &[u8; 32]) -> (PathBuf, String) { + self.inner.chunk_store.chunk_path(digest) + } + + pub fn verify_new(&self) -> bool { + self.inner.verify_new + } + + /* + /// Open a snapshot (backup directory) from this datastore by a snapshot path. + pub fn backup_dir_from_path(self: &Arc, path: &str) -> Result { + todo!("split out the namespace"); + } + */ pub fn old_locking(&self) -> bool { *OLD_LOCKING -- 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel