public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
* [pbs-devel] [PATCH proxmox-backup 1/4] tape restore: split restore_chunk_archive
@ 2021-05-03 11:23 Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 2/4] tape restore: write datastore in separate thread Dietmar Maurer
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Dietmar Maurer @ 2021-05-03 11:23 UTC (permalink / raw)
  To: pbs-devel

Split out a separate function scan_chunk_archive() for catalog restores.

Note: Required, because we need to optimize restore_chunk_archive() to
write datastore in separate threads (else thape drive will stop during restore)
---
 src/api2/tape/restore.rs | 90 ++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 21 deletions(-)

diff --git a/src/api2/tape/restore.rs b/src/api2/tape/restore.rs
index b61e99a4..39aa5187 100644
--- a/src/api2/tape/restore.rs
+++ b/src/api2/tape/restore.rs
@@ -150,12 +150,12 @@ impl DataStoreMap {
         set
     }
 
-    fn get_datastore(&self, source: &str) -> Option<&DataStore> {
+    fn get_datastore(&self, source: &str) -> Option<Arc<DataStore>> {
         if let Some(store) = self.map.get(source) {
-            return Some(&store);
+            return Some(Arc::clone(store));
         }
         if let Some(ref store) = self.default {
-            return Some(&store);
+            return Some(Arc::clone(store));
         }
 
         return None;
@@ -575,10 +575,16 @@ fn restore_archive<'a>(
 
             if datastore.is_some() || target.is_none() {
                 let checked_chunks = checked_chunks_map
-                    .entry(datastore.map(|d| d.name()).unwrap_or("_unused_").to_string())
+                    .entry(datastore.as_ref().map(|d| d.name()).unwrap_or("_unused_").to_string())
                     .or_insert(HashSet::new());
 
-                if let Some(chunks) = restore_chunk_archive(worker, reader, datastore, checked_chunks, verbose)? {
+                let chunks = if let Some(datastore) = datastore {
+                    restore_chunk_archive(worker, reader, datastore, checked_chunks, verbose)?
+                } else {
+                    scan_chunk_archive(worker, reader, verbose)?
+                };
+
+                if let Some(chunks) = chunks {
                     catalog.start_chunk_archive(
                         Uuid::from(header.uuid),
                         current_file_number,
@@ -616,10 +622,56 @@ fn restore_archive<'a>(
     Ok(())
 }
 
+// Read chunk archive without restoring data - just record contained chunks
+fn scan_chunk_archive<'a>(
+    worker: &WorkerTask,
+    reader: Box<dyn 'a + TapeRead>,
+    verbose: bool,
+) -> Result<Option<Vec<[u8;32]>>, Error> {
+
+    let mut chunks = Vec::new();
+
+    let mut decoder = ChunkArchiveDecoder::new(reader);
+
+    loop {
+        let digest = match decoder.next_chunk() {
+            Ok(Some((digest, _blob))) => digest,
+            Ok(None) => break,
+            Err(err) => {
+                let reader = decoder.reader();
+
+                // check if this stream is marked incomplete
+                if let Ok(true) = reader.is_incomplete() {
+                    return Ok(Some(chunks));
+                }
+
+                // check if this is an aborted stream without end marker
+                if let Ok(false) = reader.has_end_marker() {
+                    worker.log("missing stream end marker".to_string());
+                    return Ok(None);
+                }
+
+                // else the archive is corrupt
+                return Err(err);
+            }
+        };
+
+        worker.check_abort()?;
+
+        if verbose {
+            task_log!(worker, "Found chunk: {}", proxmox::tools::digest_to_hex(&digest));
+        }
+
+        chunks.push(digest);
+    }
+
+    Ok(Some(chunks))
+}
+
 fn restore_chunk_archive<'a>(
     worker: &WorkerTask,
     reader: Box<dyn 'a + TapeRead>,
-    datastore: Option<&DataStore>,
+    datastore: Arc<DataStore>,
     checked_chunks: &mut HashSet<[u8;32]>,
     verbose: bool,
 ) -> Result<Option<Vec<[u8;32]>>, Error> {
@@ -653,25 +705,21 @@ fn restore_chunk_archive<'a>(
 
         worker.check_abort()?;
 
-        if let Some(datastore) = datastore {
-            let chunk_exists = datastore.cond_touch_chunk(&digest, false)?;
-            if !chunk_exists {
-                blob.verify_crc()?;
+        let chunk_exists = datastore.cond_touch_chunk(&digest, false)?;
+        if !chunk_exists {
+            blob.verify_crc()?;
 
-                if blob.crypt_mode()? == CryptMode::None {
-                    blob.decode(None, Some(&digest))?; // verify digest
-                }
-                if verbose {
-                    task_log!(worker, "Insert chunk: {}", proxmox::tools::digest_to_hex(&digest));
-                }
-                datastore.insert_chunk(&blob, &digest)?;
-            } else if verbose {
-                task_log!(worker, "Found existing chunk: {}", proxmox::tools::digest_to_hex(&digest));
+            if blob.crypt_mode()? == CryptMode::None {
+                blob.decode(None, Some(&digest))?; // verify digest
             }
-            checked_chunks.insert(digest.clone());
+            if verbose {
+                task_log!(worker, "Insert chunk: {}", proxmox::tools::digest_to_hex(&digest));
+            }
+            datastore.insert_chunk(&blob, &digest)?;
         } else if verbose {
-            task_log!(worker, "Found chunk: {}", proxmox::tools::digest_to_hex(&digest));
+            task_log!(worker, "Found existing chunk: {}", proxmox::tools::digest_to_hex(&digest));
         }
+        checked_chunks.insert(digest.clone());
         chunks.push(digest);
     }
 
-- 
2.20.1




^ permalink raw reply	[flat|nested] 4+ messages in thread

* [pbs-devel] [PATCH proxmox-backup 2/4] tape restore: write datastore in separate thread
  2021-05-03 11:23 [pbs-devel] [PATCH proxmox-backup 1/4] tape restore: split restore_chunk_archive Dietmar Maurer
@ 2021-05-03 11:23 ` Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 3/4] tape restore: add restore speed to logs Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 4/4] tape restore: do not verify restored files Dietmar Maurer
  2 siblings, 0 replies; 4+ messages in thread
From: Dietmar Maurer @ 2021-05-03 11:23 UTC (permalink / raw)
  To: pbs-devel

---
 src/api2/tape/restore.rs | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/api2/tape/restore.rs b/src/api2/tape/restore.rs
index 39aa5187..97fd822f 100644
--- a/src/api2/tape/restore.rs
+++ b/src/api2/tape/restore.rs
@@ -32,7 +32,7 @@ use crate::{
     task_log,
     task_warn,
     task::TaskState,
-    tools::compute_file_csum,
+    tools::{compute_file_csum, ParallelHandler},
     api2::types::{
         DATASTORE_MAP_ARRAY_SCHEMA,
         DATASTORE_MAP_LIST_SCHEMA,
@@ -680,6 +680,24 @@ fn restore_chunk_archive<'a>(
 
     let mut decoder = ChunkArchiveDecoder::new(reader);
 
+    let datastore2 = datastore.clone();
+    let writer_pool = ParallelHandler::new(
+        "tape restore chunk writer",
+        4,
+        move |(chunk, digest): (DataBlob, [u8; 32])| {
+            // println!("verify and write {}", proxmox::tools::digest_to_hex(&digest));
+            chunk.verify_crc()?;
+            if chunk.crypt_mode()? == CryptMode::None {
+                chunk.decode(None, Some(&digest))?; // verify digest
+            }
+
+            datastore2.insert_chunk(&chunk, &digest)?;
+            Ok(())
+        },
+    );
+
+    let verify_and_write_channel = writer_pool.channel();
+
     loop {
         let (digest, blob) = match decoder.next_chunk() {
             Ok(Some((digest, blob))) => (digest, blob),
@@ -707,22 +725,21 @@ fn restore_chunk_archive<'a>(
 
         let chunk_exists = datastore.cond_touch_chunk(&digest, false)?;
         if !chunk_exists {
-            blob.verify_crc()?;
-
-            if blob.crypt_mode()? == CryptMode::None {
-                blob.decode(None, Some(&digest))?; // verify digest
-            }
-            if verbose {
+             if verbose {
                 task_log!(worker, "Insert chunk: {}", proxmox::tools::digest_to_hex(&digest));
             }
-            datastore.insert_chunk(&blob, &digest)?;
-        } else if verbose {
+            verify_and_write_channel.send((blob, digest.clone()))?;
+         } else if verbose {
             task_log!(worker, "Found existing chunk: {}", proxmox::tools::digest_to_hex(&digest));
         }
         checked_chunks.insert(digest.clone());
         chunks.push(digest);
     }
 
+    drop(verify_and_write_channel);
+
+    writer_pool.complete()?;
+
     Ok(Some(chunks))
 }
 
-- 
2.20.1




^ permalink raw reply	[flat|nested] 4+ messages in thread

* [pbs-devel] [PATCH proxmox-backup 3/4] tape restore: add restore speed to logs
  2021-05-03 11:23 [pbs-devel] [PATCH proxmox-backup 1/4] tape restore: split restore_chunk_archive Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 2/4] tape restore: write datastore in separate thread Dietmar Maurer
@ 2021-05-03 11:23 ` Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 4/4] tape restore: do not verify restored files Dietmar Maurer
  2 siblings, 0 replies; 4+ messages in thread
From: Dietmar Maurer @ 2021-05-03 11:23 UTC (permalink / raw)
  To: pbs-devel

---
 src/api2/tape/restore.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/api2/tape/restore.rs b/src/api2/tape/restore.rs
index 97fd822f..2e422cbc 100644
--- a/src/api2/tape/restore.rs
+++ b/src/api2/tape/restore.rs
@@ -698,6 +698,9 @@ fn restore_chunk_archive<'a>(
 
     let verify_and_write_channel = writer_pool.channel();
 
+    let start_time = std::time::SystemTime::now();
+    let mut bytes = 0;
+
     loop {
         let (digest, blob) = match decoder.next_chunk() {
             Ok(Some((digest, blob))) => (digest, blob),
@@ -728,6 +731,7 @@ fn restore_chunk_archive<'a>(
              if verbose {
                 task_log!(worker, "Insert chunk: {}", proxmox::tools::digest_to_hex(&digest));
             }
+            bytes += blob.raw_size();
             verify_and_write_channel.send((blob, digest.clone()))?;
          } else if verbose {
             task_log!(worker, "Found existing chunk: {}", proxmox::tools::digest_to_hex(&digest));
@@ -740,6 +744,15 @@ fn restore_chunk_archive<'a>(
 
     writer_pool.complete()?;
 
+    let elapsed = start_time.elapsed()?.as_secs_f64();
+
+    task_log!(
+        worker,
+        "restored {} bytes ({:.2} MB/s)",
+        bytes,
+        (bytes as f64) / (1_000_000.0 * elapsed)
+    );
+
     Ok(Some(chunks))
 }
 
-- 
2.20.1




^ permalink raw reply	[flat|nested] 4+ messages in thread

* [pbs-devel] [PATCH proxmox-backup 4/4] tape restore: do not verify restored files
  2021-05-03 11:23 [pbs-devel] [PATCH proxmox-backup 1/4] tape restore: split restore_chunk_archive Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 2/4] tape restore: write datastore in separate thread Dietmar Maurer
  2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 3/4] tape restore: add restore speed to logs Dietmar Maurer
@ 2021-05-03 11:23 ` Dietmar Maurer
  2 siblings, 0 replies; 4+ messages in thread
From: Dietmar Maurer @ 2021-05-03 11:23 UTC (permalink / raw)
  To: pbs-devel

Because this is too slow and causes the tape motor to stop. Instead,
remove the verify_state from the manifest.
---
 src/api2/tape/restore.rs | 53 +++++++++++++---------------------------
 1 file changed, 17 insertions(+), 36 deletions(-)

diff --git a/src/api2/tape/restore.rs b/src/api2/tape/restore.rs
index 2e422cbc..2614c68a 100644
--- a/src/api2/tape/restore.rs
+++ b/src/api2/tape/restore.rs
@@ -32,7 +32,7 @@ use crate::{
     task_log,
     task_warn,
     task::TaskState,
-    tools::{compute_file_csum, ParallelHandler},
+    tools::ParallelHandler,
     api2::types::{
         DATASTORE_MAP_ARRAY_SCHEMA,
         DATASTORE_MAP_LIST_SCHEMA,
@@ -51,17 +51,12 @@ use crate::{
         },
     },
     backup::{
-        archive_type,
         MANIFEST_BLOB_NAME,
         CryptMode,
         DataStore,
         BackupDir,
         DataBlob,
         BackupManifest,
-        ArchiveType,
-        IndexFile,
-        DynamicIndexReader,
-        FixedIndexReader,
     },
     server::{
         lookup_user_email,
@@ -790,8 +785,8 @@ fn try_restore_snapshot_archive<R: pxar::decoder::SeqRead>(
     worker: &WorkerTask,
     decoder: &mut pxar::decoder::sync::Decoder<R>,
     snapshot_path: &Path,
-    datastore: &DataStore,
-    checked_chunks: &mut HashSet<[u8;32]>,
+    _datastore: &DataStore,
+    _checked_chunks: &mut HashSet<[u8;32]>,
 ) -> Result<(), Error> {
 
     let _root = match decoder.next() {
@@ -848,6 +843,16 @@ fn try_restore_snapshot_archive<R: pxar::decoder::SeqRead>(
         if filename == manifest_file_name {
 
             let blob = DataBlob::load_from_reader(&mut contents)?;
+            let mut old_manifest = BackupManifest::try_from(blob)?;
+
+            // Remove verify_state to indicate that this snapshot is not verified
+            old_manifest.unprotected
+                .as_object_mut()
+                .map(|m| m.remove("verify_state"));
+
+            let old_manifest = serde_json::to_string_pretty(&old_manifest)?;
+            let blob = DataBlob::encode(old_manifest.as_bytes(), None, true)?;
+
             let options = CreateOptions::new();
             replace_file(&tmp_path, blob.raw_data(), options)?;
 
@@ -868,36 +873,12 @@ fn try_restore_snapshot_archive<R: pxar::decoder::SeqRead>(
         }
     }
 
-    let manifest = match manifest {
-        None => bail!("missing manifest"),
-        Some(manifest) => manifest,
-    };
-
-    for item in manifest.files() {
-        let mut archive_path = snapshot_path.to_owned();
-        archive_path.push(&item.filename);
-
-        match archive_type(&item.filename)? {
-            ArchiveType::DynamicIndex => {
-                let index = DynamicIndexReader::open(&archive_path)?;
-                let (csum, size) = index.compute_csum();
-                manifest.verify_file(&item.filename, &csum, size)?;
-                datastore.fast_index_verification(&index, checked_chunks)?;
-            }
-            ArchiveType::FixedIndex => {
-                let index = FixedIndexReader::open(&archive_path)?;
-                let (csum, size) = index.compute_csum();
-                manifest.verify_file(&item.filename, &csum, size)?;
-                datastore.fast_index_verification(&index, checked_chunks)?;
-            }
-            ArchiveType::Blob => {
-                let mut tmpfile = std::fs::File::open(&archive_path)?;
-                let (csum, size) = compute_file_csum(&mut tmpfile)?;
-                manifest.verify_file(&item.filename, &csum, size)?;
-            }
-        }
+    if manifest.is_none() {
+        bail!("missing manifest");
     }
 
+    // Do not verify anything here, because this would be to slow (causes tape stops).
+
     // commit manifest
     let mut manifest_path = snapshot_path.to_owned();
     manifest_path.push(MANIFEST_BLOB_NAME);
-- 
2.20.1




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-05-03 11:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-03 11:23 [pbs-devel] [PATCH proxmox-backup 1/4] tape restore: split restore_chunk_archive Dietmar Maurer
2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 2/4] tape restore: write datastore in separate thread Dietmar Maurer
2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 3/4] tape restore: add restore speed to logs Dietmar Maurer
2021-05-03 11:23 ` [pbs-devel] [PATCH proxmox-backup 4/4] tape restore: do not verify restored files Dietmar Maurer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal