From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 288FE8C3E4
 for <pbs-devel@lists.proxmox.com>; Mon, 31 Oct 2022 12:40:33 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 0BFEB16E3
 for <pbs-devel@lists.proxmox.com>; Mon, 31 Oct 2022 12:40:03 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pbs-devel@lists.proxmox.com>; Mon, 31 Oct 2022 12:40:01 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 6AA2B44BAB
 for <pbs-devel@lists.proxmox.com>; Mon, 31 Oct 2022 12:39:55 +0100 (CET)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Date: Mon, 31 Oct 2022 12:39:53 +0100
Message-Id: <20221031113953.3111599-4-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20221031113953.3111599-1-d.csapak@proxmox.com>
References: <20221031113953.3111599-1-d.csapak@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.067 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: [pbs-devel] [RFC PATCH proxmox-backup 2/2] file-restore:
 dynamically increase memory of vm for zpools
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Mon, 31 Oct 2022 11:40:33 -0000

when a backup contains a drive with zfs on it, the default memory
size (up to 384 MiB) is often not enough to hold the zfs metadata

to improve that situation, add memory dynamically (1GiB) when a path is
requested that is on zfs. Note that the image must be started with a
kernel capable of memory hotplug.

to achieve that, we also have to add a qmp socket to the vm, so that
we can later connect and add the memory backend and dimm

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 proxmox-file-restore/src/block_driver_qemu.rs | 45 ++++++++++--
 proxmox-file-restore/src/qemu_helper.rs       | 72 ++++++++++++++++++-
 2 files changed, 107 insertions(+), 10 deletions(-)

diff --git a/proxmox-file-restore/src/block_driver_qemu.rs b/proxmox-file-restore/src/block_driver_qemu.rs
index 736ae2fd..c2cd8d49 100644
--- a/proxmox-file-restore/src/block_driver_qemu.rs
+++ b/proxmox-file-restore/src/block_driver_qemu.rs
@@ -1,7 +1,10 @@
 //! Block file access via a small QEMU restore VM using the PBS block driver in QEMU
 use std::collections::HashMap;
+use std::ffi::OsStr;
 use std::fs::{File, OpenOptions};
 use std::io::{prelude::*, BufReader, BufWriter, SeekFrom};
+use std::os::unix::prelude::OsStrExt;
+use std::path::Path;
 
 use anyhow::{bail, Error};
 use futures::FutureExt;
@@ -16,6 +19,7 @@ use pbs_datastore::catalog::ArchiveEntry;
 
 use super::block_driver::*;
 use crate::get_user_run_dir;
+use crate::qemu_helper::set_dynamic_memory;
 
 const RESTORE_VM_MAP: &str = "restore-vm-map.json";
 
@@ -119,7 +123,7 @@ fn new_ticket() -> String {
     proxmox_uuid::Uuid::generate().to_string()
 }
 
-async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Error> {
+async fn ensure_running(details: &SnapRestoreDetails) -> Result<(i32, VsockClient), Error> {
     let name = make_name(&details.repo, &details.namespace, &details.snapshot);
     let mut state = VMStateMap::load()?;
 
@@ -133,7 +137,7 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
             match res {
                 Ok(_) => {
                     // VM is running and we just reset its timeout, nothing to do
-                    return Ok(client);
+                    return Ok((vm.cid, client));
                 }
                 Err(err) => {
                     log::warn!("stale VM detected, restarting ({})", err);
@@ -170,13 +174,30 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
     };
 
     state.write()?;
-    Ok(VsockClient::new(
+    Ok((
         new_cid,
-        DEFAULT_VSOCK_PORT,
-        Some(vms.ticket),
+        VsockClient::new(new_cid, DEFAULT_VSOCK_PORT, Some(vms.ticket)),
     ))
 }
 
+fn path_is_zfs(path: &[u8]) -> bool {
+    if path.is_empty() {
+        return false;
+    }
+    let path = Path::new(OsStr::from_bytes(path));
+    let mut components = path.components();
+    let part = match components.next() {
+        Some(std::path::Component::RootDir) => match components.next() {
+            Some(std::path::Component::Normal(comp)) => comp,
+            _ => return false,
+        },
+        Some(std::path::Component::Normal(comp)) => comp,
+        _ => return false,
+    };
+
+    part == OsStr::new("zpool") && components.next().is_some()
+}
+
 async fn start_vm(cid_request: i32, details: &SnapRestoreDetails) -> Result<VMState, Error> {
     let ticket = new_ticket();
     let files = details
@@ -199,10 +220,15 @@ impl BlockRestoreDriver for QemuBlockDriver {
         mut path: Vec<u8>,
     ) -> Async<Result<Vec<ArchiveEntry>, Error>> {
         async move {
-            let client = ensure_running(&details).await?;
+            let (cid, client) = ensure_running(&details).await?;
             if !path.is_empty() && path[0] != b'/' {
                 path.insert(0, b'/');
             }
+            if path_is_zfs(&path) {
+                if let Err(err) = set_dynamic_memory(cid, None).await {
+                    log::error!("could not increase memory: {err}");
+                }
+            }
             let path = base64::encode(img_file.bytes().chain(path).collect::<Vec<u8>>());
             let mut result = client
                 .get("api2/json/list", Some(json!({ "path": path })))
@@ -221,10 +247,15 @@ impl BlockRestoreDriver for QemuBlockDriver {
         zstd: bool,
     ) -> Async<Result<Box<dyn tokio::io::AsyncRead + Unpin + Send>, Error>> {
         async move {
-            let client = ensure_running(&details).await?;
+            let (cid, client) = ensure_running(&details).await?;
             if !path.is_empty() && path[0] != b'/' {
                 path.insert(0, b'/');
             }
+            if path_is_zfs(&path) {
+                if let Err(err) = set_dynamic_memory(cid, None).await {
+                    log::error!("could not increase memory: {err}");
+                }
+            }
             let path = base64::encode(img_file.bytes().chain(path).collect::<Vec<u8>>());
             let (mut tx, rx) = tokio::io::duplex(1024 * 4096);
             let mut data = json!({ "path": path, "zstd": zstd });
diff --git a/proxmox-file-restore/src/qemu_helper.rs b/proxmox-file-restore/src/qemu_helper.rs
index d6f4c5a9..7216d351 100644
--- a/proxmox-file-restore/src/qemu_helper.rs
+++ b/proxmox-file-restore/src/qemu_helper.rs
@@ -6,7 +6,12 @@ use std::path::PathBuf;
 use std::time::{Duration, Instant};
 
 use anyhow::{bail, format_err, Error};
-use tokio::time;
+use serde_json::json;
+use tokio::io::AsyncBufRead;
+use tokio::{
+    io::{AsyncBufReadExt, AsyncWrite, AsyncWriteExt},
+    time,
+};
 
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
@@ -22,6 +27,8 @@ use crate::{backup_user, cpio};
 
 const PBS_VM_NAME: &str = "pbs-restore-vm";
 const MAX_CID_TRIES: u64 = 32;
+const DYNAMIC_MEMORY_MB: usize = 1024;
+const QMP_SOCKET_PREFIX: &str = "/run/proxmox-backup/file-restore-qmp-";
 
 fn create_restore_log_dir() -> Result<String, Error> {
     let logpath = format!("{}/file-restore", pbs_buildcfg::PROXMOX_BACKUP_LOG_DIR);
@@ -121,6 +128,54 @@ async fn create_temp_initramfs(ticket: &str, debug: bool) -> Result<(File, Strin
     Ok((tmp_file, path))
 }
 
+async fn send_qmp_request<T: AsyncBufRead + AsyncWrite + Unpin>(
+    stream: &mut T,
+    request: &str,
+) -> Result<String, Error> {
+    stream.write_all(request.as_bytes()).await?;
+    stream.flush().await?;
+    let mut buf = String::new();
+    let _ = stream.read_line(&mut buf).await?;
+    Ok(buf)
+}
+
+pub async fn set_dynamic_memory(cid: i32, target_memory: Option<usize>) -> Result<(), Error> {
+    let target_memory = match target_memory {
+        Some(size) if size > DYNAMIC_MEMORY_MB => {
+            bail!("cannot set to {}M, maximum is {}M", size, DYNAMIC_MEMORY_MB)
+        }
+        Some(size) => size,
+        None => DYNAMIC_MEMORY_MB,
+    };
+
+    let path = format!("{}{}.sock", QMP_SOCKET_PREFIX, cid);
+    let mut stream = tokio::io::BufStream::new(tokio::net::UnixStream::connect(path).await?);
+
+    let _ = stream.read_line(&mut String::new()).await?; // initial qmp message
+    let _ = send_qmp_request(&mut stream, "{\"execute\":\"qmp_capabilities\"}\n").await?;
+
+    let request = json!({
+        "execute": "object-add",
+        "arguments": {
+            "qom-type": "memory-backend-ram",
+            "id": "mem0",
+            "size": target_memory * 1024 * 1024,
+        }
+    });
+    let _ = send_qmp_request(&mut stream, &serde_json::to_string(&request)?).await?;
+    let request = json!({
+        "execute": "device_add",
+        "arguments": {
+            "driver": "pc-dimm",
+            "id": "dimm0",
+            "memdev": "mem0",
+        }
+    });
+    let _ = send_qmp_request(&mut stream, &serde_json::to_string(&request)?).await?;
+
+    Ok(())
+}
+
 pub async fn start_vm(
     // u16 so we can do wrapping_add without going too high
     mut cid: u16,
@@ -185,7 +240,8 @@ pub async fn start_vm(
         &ramfs_path,
         "-append",
         &format!(
-            "{} panic=1 zfs_arc_min=0 zfs_arc_max=0",
+            "{} panic=1 zfs_arc_min=0 zfs_arc_max=0 memhp_default_state=online_kernel
+",
             if debug { "debug" } else { "quiet" }
         ),
         "-daemonize",
@@ -252,13 +308,23 @@ pub async fn start_vm(
         let mut qemu_cmd = std::process::Command::new("qemu-system-x86_64");
         qemu_cmd.args(base_args.iter());
         qemu_cmd.arg("-m");
-        qemu_cmd.arg(ram.to_string());
+        qemu_cmd.arg(format!(
+            "{ram}M,slots=1,maxmem={}M",
+            DYNAMIC_MEMORY_MB + ram
+        ));
         qemu_cmd.args(&drives);
         qemu_cmd.arg("-device");
         qemu_cmd.arg(format!(
             "vhost-vsock-pci,guest-cid={},disable-legacy=on",
             cid
         ));
+        qemu_cmd.arg("-chardev");
+        qemu_cmd.arg(format!(
+            "socket,id=qmp,path={}{}.sock,server=on,wait=off",
+            QMP_SOCKET_PREFIX, cid
+        ));
+        qemu_cmd.arg("-mon");
+        qemu_cmd.arg("chardev=qmp,mode=control");
 
         if debug {
             let debug_args = [
-- 
2.30.2