From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 830261FF173 for ; Mon, 25 Nov 2024 14:24:51 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 319941392E; Mon, 25 Nov 2024 14:24:51 +0100 (CET) Date: Mon, 25 Nov 2024 14:24:13 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20241122144713.299130-1-h.laimer@proxmox.com> <20241122144713.299130-6-h.laimer@proxmox.com> In-Reply-To: <20241122144713.299130-6-h.laimer@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1732540770.nh1cgxu1lj.astroid@yuna.none> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.049 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v14 05/25] api: admin: add (un)mount endpoint for removable datastores X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On November 22, 2024 3:46 pm, Hannes Laimer wrote: > Removable datastores can be mounted unless > - they are already > - their device is not present > For unmounting the maintenance mode is set to `unmount`, > which prohibits the starting of any new tasks envolving any > IO, this mode is unset either > - on completion of the unmount > - on abort of the unmount tasks > If the unmounting itself should fail, the maintenance mode stays in > place and requires manual intervention by unsetting it in the config > file directly. This is intentional, as unmounting should not fail, > and if it should the situation should be looked at. > > Signed-off-by: Hannes Laimer > --- > changes since v13: > * improve logging > * fix racy unmount > * (manually) changing maintenance during unmount will prevent unmounting and > result in failed unmount task > > src/api2/admin/datastore.rs | 294 ++++++++++++++++++++++++++++++++++-- > 1 file changed, 283 insertions(+), 11 deletions(-) > > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs > index 3b863c06b..85522345e 100644 > --- a/src/api2/admin/datastore.rs > +++ b/src/api2/admin/datastore.rs > @@ -4,7 +4,7 @@ use std::collections::HashSet; > use std::ffi::OsStr; > use std::ops::Deref; > use std::os::unix::ffi::OsStrExt; > -use std::path::PathBuf; > +use std::path::{Path, PathBuf}; > use std::sync::Arc; > > use anyhow::{bail, format_err, Error}; > @@ -14,7 +14,7 @@ use hyper::{header, Body, Response, StatusCode}; > use serde::Deserialize; > use serde_json::{json, Value}; > use tokio_stream::wrappers::ReceiverStream; > -use tracing::{info, warn}; > +use tracing::{debug, info, warn}; > > use proxmox_async::blocking::WrappedReaderStream; > use proxmox_async::{io::AsyncChannelWriter, stream::AsyncReaderStream}; > @@ -30,6 +30,7 @@ use proxmox_sys::fs::{ > file_read_firstline, file_read_optional_string, replace_file, CreateOptions, > }; > use proxmox_time::CalendarEvent; > +use proxmox_worker_task::WorkerTaskContext; > > use pxar::accessor::aio::Accessor; > use pxar::EntryKind; > @@ -38,13 +39,13 @@ use pbs_api_types::{ > print_ns_and_snapshot, print_store_and_ns, ArchiveType, Authid, BackupArchiveName, > BackupContent, BackupGroupDeleteStats, BackupNamespace, BackupType, Counts, CryptMode, > DataStoreConfig, DataStoreListItem, DataStoreStatus, GarbageCollectionJobStatus, GroupListItem, > - JobScheduleStatus, KeepOptions, Operation, PruneJobOptions, SnapshotListItem, > - SnapshotVerifyState, BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, > - BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, CATALOG_NAME, CLIENT_LOG_BLOB_NAME, DATASTORE_SCHEMA, > - IGNORE_VERIFIED_BACKUPS_SCHEMA, MANIFEST_BLOB_NAME, MAX_NAMESPACE_DEPTH, NS_MAX_DEPTH_SCHEMA, > - PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE, > - PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, UPID, UPID_SCHEMA, > - VERIFICATION_OUTDATED_AFTER_SCHEMA, > + JobScheduleStatus, KeepOptions, MaintenanceMode, MaintenanceType, Operation, PruneJobOptions, > + SnapshotListItem, SnapshotVerifyState, BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, > + BACKUP_NAMESPACE_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, CATALOG_NAME, > + CLIENT_LOG_BLOB_NAME, DATASTORE_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, MANIFEST_BLOB_NAME, > + MAX_NAMESPACE_DEPTH, NS_MAX_DEPTH_SCHEMA, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, > + PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE, PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, UPID, > + UPID_SCHEMA, VERIFICATION_OUTDATED_AFTER_SCHEMA, > }; > use pbs_client::pxar::{create_tar, create_zip}; > use pbs_config::CachedUserInfo; > @@ -59,8 +60,8 @@ use pbs_datastore::index::IndexFile; > use pbs_datastore::manifest::BackupManifest; > use pbs_datastore::prune::compute_prune_info; > use pbs_datastore::{ > - check_backup_owner, task_tracking, BackupDir, BackupGroup, DataStore, LocalChunkReader, > - StoreProgress, > + check_backup_owner, ensure_datastore_is_mounted, task_tracking, BackupDir, BackupGroup, > + DataStore, LocalChunkReader, StoreProgress, > }; > use pbs_tools::json::required_string_param; > use proxmox_rest_server::{formatter, WorkerTask}; > @@ -2394,6 +2395,275 @@ pub async fn set_backup_owner( > .await? > } > > +/// Here we > +/// > +/// 1. mount the removable device to `/mount/` > +/// 2. bind mount `/mount//` to `/mnt/datastore/` > +/// 3. unmount `/mount/` > +/// > +/// leaving us with the datastore being mounted directly with its name under /mnt/datastore/... > +/// > +/// The reason for the randomized device mounting paths is to avoid two tasks trying to mount to > +/// the same path, this is *very* unlikely since the device is only mounted really shortly, but > +/// technically possible. > +pub fn do_mount_device(datastore: DataStoreConfig) -> Result<(), Error> { > + if let Some(uuid) = datastore.backing_device.as_ref() { > + let mount_point = datastore.absolute_path(); > + if pbs_datastore::get_datastore_mount_status(&datastore) == Some(true) { > + bail!("device is already mounted at '{}'", mount_point); > + } > + let tmp_mount_path = format!( > + "{}/{:x}", > + pbs_buildcfg::rundir!("/mount"), > + proxmox_uuid::Uuid::generate() > + ); > + > + let default_options = proxmox_sys::fs::CreateOptions::new(); > + proxmox_sys::fs::create_path( > + &tmp_mount_path, > + Some(default_options.clone()), > + Some(default_options.clone()), > + )?; > + > + info!("temporarily mounting '{uuid}' to '{}'", tmp_mount_path); > + crate::tools::disks::mount_by_uuid(uuid, Path::new(&tmp_mount_path)) > + .map_err(|e| format_err!("mounting to tmp path failed: {e}"))?; after this point, any error should trigger an unmount before being bubbled up.. > + > + let full_store_path = format!( > + "{tmp_mount_path}/{}", > + datastore.path.trim_start_matches('/') > + ); > + let backup_user = pbs_config::backup_user()?; > + let options = CreateOptions::new() > + .owner(backup_user.uid) > + .group(backup_user.gid); > + > + proxmox_sys::fs::create_path( > + &mount_point, > + Some(default_options.clone()), > + Some(options.clone()), > + ) > + .map_err(|e| format_err!("creating mountpoint '{mount_point}' failed: {e}"))?; > + > + // can't be created before it is mounted, so we have to do it here > + proxmox_sys::fs::create_path( > + &full_store_path, > + Some(default_options.clone()), > + Some(options.clone()), > + ) > + .map_err(|e| format_err!("creating datastore path '{full_store_path}' failed: {e}"))?; > + > + info!( > + "bind mount '{}'({}) to '{}'", > + datastore.name, datastore.path, mount_point > + ); > + if let Err(err) = > + crate::tools::disks::bind_mount(Path::new(&full_store_path), Path::new(&mount_point)) > + { > + debug!("unmounting '{}'", tmp_mount_path); > + let _ = crate::tools::disks::unmount_by_mountpoint(Path::new(&tmp_mount_path)) > + .inspect_err(|e| warn!("unmounting from tmp path '{tmp_mount_path} failed: {e}'")); > + let _ = std::fs::remove_dir(std::path::Path::new(&tmp_mount_path)) > + .inspect_err(|e| warn!("removing tmp path '{tmp_mount_path} failed: {e}'")); this doesn't log the error, so adding context doesn't help at all.. > + return Err(format_err!( > + "Datastore '{}' cound not be mounted: {}.", > + datastore.name, > + err > + )); > + } > + > + debug!("unmounting '{}'", tmp_mount_path); > + let _ = crate::tools::disks::unmount_by_mountpoint(Path::new(&tmp_mount_path)) > + .map_err(|e| format_err!("unmounting from tmp path '{tmp_mount_path} failed: {e}'")); > + let _ = std::fs::remove_dir(std::path::Path::new(&tmp_mount_path)) > + .map_err(|e| format_err!("removing tmp path '{tmp_mount_path} failed: {e}'")); same here > + > + Ok(()) > + } else { > + Err(format_err!( > + "Datastore '{}' cannot be mounted because it is not removable.", > + datastore.name > + )) > + } > +} > + > +#[api( > + protected: true, > + input: { > + properties: { > + store: { > + schema: DATASTORE_SCHEMA, > + }, > + } > + }, > + returns: { > + schema: UPID_SCHEMA, > + }, > + access: { > + permission: &Permission::Privilege(&["datastore", "{store}"], PRIV_DATASTORE_AUDIT, false), > + }, > +)] > +/// Mount removable datastore. > +pub fn mount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result { > + let (section_config, _digest) = pbs_config::datastore::config()?; > + let datastore: DataStoreConfig = section_config.lookup("datastore", &store)?; > + > + if datastore.backing_device.is_none() { > + bail!("datastore '{store}' is not removable"); > + } > + > + let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?; > + let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI; > + > + let upid = WorkerTask::new_thread( > + "mount-device", > + Some(store), > + auth_id.to_string(), > + to_stdout, > + move |_worker| do_mount_device(datastore), > + )?; > + > + Ok(json!(upid)) > +} > + > +fn expect_maintanance_unmounting( > + store: &str, > +) -> Result<(pbs_config::BackupLockGuard, DataStoreConfig), Error> { > + let lock = pbs_config::datastore::lock_config()?; > + let (section_config, _digest) = pbs_config::datastore::config()?; > + let store_config: DataStoreConfig = section_config.lookup("datastore", store)?; > + > + if store_config > + .get_maintenance_mode() > + .map_or(true, |m| m.ty != MaintenanceType::Unmount) > + { > + bail!("maintenance mode is not 'Unmount'"); > + } > + > + Ok((lock, store_config)) > +} > + > +fn unset_maintenance( > + _lock: pbs_config::BackupLockGuard, > + mut config: DataStoreConfig, > +) -> Result<(), Error> { > + let (mut section_config, _digest) = pbs_config::datastore::config()?; > + config.maintenance_mode = None; > + section_config.set_data(&config.name, "datastore", &config)?; > + pbs_config::datastore::save_config(§ion_config)?; > + Ok(()) > +} > + > +fn do_unmount_device( > + datastore: DataStoreConfig, > + worker: Option<&dyn WorkerTaskContext>, > +) -> Result<(), Error> { > + if datastore.backing_device.is_none() { > + bail!("can't unmount non-removable datastore"); > + } > + let mount_point = datastore.absolute_path(); > + > + let mut active_operations = task_tracking::get_active_operations(&datastore.name)?; > + let mut old_status = String::new(); > + let mut aborted = false; > + while active_operations.read + active_operations.write > 0 { > + if let Some(worker) = worker { > + if worker.abort_requested() { > + aborted = true; > + break; > + } > + let status = format!( > + "cannot unmount yet, still {} read and {} write operations active", > + active_operations.read, active_operations.write > + ); > + if status != old_status { > + info!("{status}"); > + old_status = status; > + } > + } > + std::thread::sleep(std::time::Duration::from_secs(1)); > + active_operations = task_tracking::get_active_operations(&datastore.name)?; > + } > + > + if aborted { this still doesn't re-check whether the request was aborted.. the loop above sleeps for a second, it's possible the worker got aborted in that time frame.. > + let _ = expect_maintanance_unmounting(&datastore.name) > + .inspect_err(|e| warn!("maintenance mode was not as expected: {e}")) > + .and_then(|(lock, config)| { > + unset_maintenance(lock, config) > + .inspect_err(|e| warn!("could not reset maintenance mode: {e}")) > + }); > + bail!("aborted, due to user request"); > + } else { > + let (lock, config) = expect_maintanance_unmounting(&datastore.name)?; > + crate::tools::disks::unmount_by_mountpoint(Path::new(&mount_point))?; > + let _ = unset_maintenance(lock, config) > + .inspect_err(|e| warn!("could not reset maintenance mode: {e}")); this should return the error.. > + } > + Ok(()) > +} > + > +#[api( > + protected: true, > + input: { > + properties: { > + store: { schema: DATASTORE_SCHEMA }, > + }, > + }, > + returns: { > + schema: UPID_SCHEMA, > + }, > + access: { > + permission: &Permission::Privilege(&["datastore", "{store}"], PRIV_DATASTORE_MODIFY, true), > + } > +)] > +/// Unmount a removable device that is associated with the datastore > +pub async fn unmount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result { > + let _lock = pbs_config::datastore::lock_config()?; > + let (mut section_config, _digest) = pbs_config::datastore::config()?; > + let mut datastore: DataStoreConfig = section_config.lookup("datastore", &store)?; > + > + if datastore.backing_device.is_none() { > + bail!("datastore '{store}' is not removable"); > + } > + > + ensure_datastore_is_mounted(&datastore)?; > + > + datastore.set_maintenance_mode(Some(MaintenanceMode { > + ty: MaintenanceType::Unmount, > + message: None, > + }))?; > + section_config.set_data(&store, "datastore", &datastore)?; > + pbs_config::datastore::save_config(§ion_config)?; > + > + drop(_lock); > + > + let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?; > + let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI; > + > + if let Ok(proxy_pid) = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN) > + { > + let sock = proxmox_daemon::command_socket::path_from_pid(proxy_pid); > + let _ = proxmox_daemon::command_socket::send_raw( > + sock, > + &format!( > + "{{\"command\":\"update-datastore-cache\",\"args\":\"{}\"}}\n", > + &store > + ), > + ) > + .await; > + } > + > + let upid = WorkerTask::new_thread( > + "unmount-device", > + Some(store), > + auth_id.to_string(), > + to_stdout, > + move |worker| do_unmount_device(datastore, Some(&worker)), > + )?; > + > + Ok(json!(upid)) > +} > + > #[sortable] > const DATASTORE_INFO_SUBDIRS: SubdirMap = &[ > ( > @@ -2432,6 +2702,7 @@ const DATASTORE_INFO_SUBDIRS: SubdirMap = &[ > .get(&API_METHOD_LIST_GROUPS) > .delete(&API_METHOD_DELETE_GROUP), > ), > + ("mount", &Router::new().post(&API_METHOD_MOUNT)), > ( > "namespace", > // FIXME: move into datastore:: sub-module?! > @@ -2466,6 +2737,7 @@ const DATASTORE_INFO_SUBDIRS: SubdirMap = &[ > .delete(&API_METHOD_DELETE_SNAPSHOT), > ), > ("status", &Router::new().get(&API_METHOD_STATUS)), > + ("unmount", &Router::new().post(&API_METHOD_UNMOUNT)), > ( > "upload-backup-log", > &Router::new().upload(&API_METHOD_UPLOAD_BACKUP_LOG), > -- > 2.39.5 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel