From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 0703D1FF14C for ; Fri, 15 May 2026 09:47:39 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2C99DED4C; Fri, 15 May 2026 09:47:38 +0200 (CEST) From: Thomas Lamprecht To: pdm-devel@lists.proxmox.com Subject: [PATCH datacenter-manager v3 08/12] subscription: add Clear Key action and per-node revert Date: Fri, 15 May 2026 09:43:18 +0200 Message-ID: <20260515074623.766766-9-t.lamprecht@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260515074623.766766-1-t.lamprecht@proxmox.com> References: <20260515074623.766766-1-t.lamprecht@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778831192744 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.004 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: ZU3DQIJDMKR2KOMO6BX4SPXRCXSFJGPO X-Message-ID-Hash: ZU3DQIJDMKR2KOMO6BX4SPXRCXSFJGPO X-MailFrom: t.lamprecht@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Datacenter Manager development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Wire a Clear Key action on the Node Subscription Status panel that queues the live subscription on a remote node for removal at the next Apply Pending. Clear Key refuses with BAD_REQUEST when no pool entry is bound to (remote, node): the resource is fine, it just is not in a state where Clear Key applies. The per-node Revert action handles queued Clear Keys via a new revert-pending-clear endpoint that drops just the flag and keeps the binding, so backing out a single queued clear no longer requires the global Discard Pending. The orphan-prevention error messages on the delete and unassign paths now point at Clear Key as the remediation. Signed-off-by: Thomas Lamprecht --- Changes v2 -> 3: * Renamed from Reissue Key to Clear Key (the v2 action did not actually round-trip through the shop). Pool flag renamed from pending_reissue to pending_clear; "Reissue" stays reserved for the future shop-side action. * v2's "queue reissue + cancel" foreign-key-adoption shortcut is dropped in favour of the explicit Adopt Key action introduced in v3-0009. * Per-node Revert handles queued clears via a new revert-pending-clear endpoint, so backing out a single queued clear no longer needs the global Discard Pending. cli/client/src/subscriptions.rs | 66 +++ docs/subscription-registry.rst | 25 +- lib/pdm-api-types/src/subscription.rs | 6 +- lib/pdm-client/src/lib.rs | 61 +++ server/src/api/subscriptions/mod.rs | 377 ++++++++++++++++-- ui/src/configuration/subscription_keys.rs | 9 +- ui/src/configuration/subscription_registry.rs | 169 +++++++- 7 files changed, 647 insertions(+), 66 deletions(-) diff --git a/cli/client/src/subscriptions.rs b/cli/client/src/subscriptions.rs index 00c06ada..b9172a2e 100644 --- a/cli/client/src/subscriptions.rs +++ b/cli/client/src/subscriptions.rs @@ -40,6 +40,14 @@ pub fn cli() -> CommandLineInterface { .insert("auto-assign", CliCommand::new(&API_METHOD_AUTO_ASSIGN)) .insert("apply-pending", CliCommand::new(&API_METHOD_APPLY_PENDING)) .insert("clear-pending", CliCommand::new(&API_METHOD_CLEAR_PENDING)) + .insert( + "clear-key", + CliCommand::new(&API_METHOD_CLEAR_KEY).arg_param(&["remote", "node"]), + ) + .insert( + "revert-clear", + CliCommand::new(&API_METHOD_REVERT_CLEAR).arg_param(&["remote", "node"]), + ) .into() } @@ -149,6 +157,9 @@ async fn list_keys() -> Result<(), Error> { let key_width = keys.iter().map(|k| k.key.len()).max().unwrap_or(20); for key in &keys { let assignment = match (&key.remote, &key.node) { + (Some(r), Some(n)) if key.pending_clear => { + format!("{r}/{n} [clear queued]") + } (Some(r), Some(n)) => format!("{r}/{n}"), _ => "(unassigned)".to_string(), }; @@ -297,6 +308,61 @@ async fn auto_assign(apply: bool) -> Result<(), Error> { Ok(()) } +#[api( + input: { + properties: { + remote: { schema: REMOTE_ID_SCHEMA }, + node: { schema: NODE_SCHEMA }, + digest: { + schema: PROXMOX_CONFIG_DIGEST_SCHEMA, + optional: true, + }, + }, + }, +)] +/// Drop a queued Clear Key on a remote node while keeping the pool binding. +async fn revert_clear( + remote: String, + node: String, + digest: Option, +) -> Result<(), Error> { + let digest = digest.map(ConfigDigest::from); + client()? + .subscription_revert_pending_clear(&remote, &node, digest) + .await?; + println!("Reverted pending clear on {remote}/{node}."); + Ok(()) +} + +#[api( + input: { + properties: { + remote: { schema: REMOTE_ID_SCHEMA }, + node: { schema: NODE_SCHEMA }, + digest: { + schema: PROXMOX_CONFIG_DIGEST_SCHEMA, + optional: true, + }, + }, + }, +)] +/// Queue a Clear Key on a remote node so its subscription can be removed at next Apply Pending. +/// +/// Refuses if no pool entry is bound to (remote, node): foreign live subscriptions must first +/// be imported via the explicit Adopt Key action, never as a side effect of queueing a clear. +async fn clear_key( + remote: String, + node: String, + digest: Option, +) -> Result<(), Error> { + let digest = digest.map(ConfigDigest::from); + client()? + .subscription_queue_clear(&remote, &node, digest) + .await?; + println!("Queued Clear Key on {remote}/{node}; run apply-pending to commit."); + Ok(()) +} + #[api( input: { properties: { diff --git a/docs/subscription-registry.rst b/docs/subscription-registry.rst index f1e6fd5b..68b879be 100644 --- a/docs/subscription-registry.rst +++ b/docs/subscription-registry.rst @@ -24,21 +24,28 @@ configured remote alongside any pending plan from the pool. Nodes that already h registry assigned appear with the live level; nodes with a pending pool assignment show a clock icon until the change is pushed to the remote. -From this view an operator can clear a pending assignment or remove the key from the pool entirely, -which is convenient when a node is known to be wrong without first having to find the matching entry -on the key list. +From this view an operator can revert a pending change on the selected node (an unpushed +assignment or a queued Clear Key) or queue a new Clear Key. Clear Key frees the live +subscription key from a node so it can be reassigned elsewhere. The action is queued until it +is committed via Apply Pending or reverted on a per-node basis. -Assignment ----------- +Assignment and Clearing +----------------------- A key can be pinned to a single node manually. The Auto-Assign action proposes a plan that fills unsubscribed nodes from free pool keys. For -Proxmox VE, the smallest covering key by socket count is chosen, so a 4-socket key is not used on a -2-socket host while a larger host stays unsubscribed. +Proxmox VE, the smallest covering key by socket count is chosen, so a 4-socket key is not used +on a 2-socket host while a larger host stays unsubscribed. -The proposed plan can be inspected before it is applied. Apply Pending pushes the queued keys to -their target nodes; if a push fails the remaining queue is kept intact for retry. Discard Pending +The Clear Key action queues the live subscription on the selected node for removal. The +action requires the (remote, node) to already be tracked by the pool. Apply Pending later +issues the removal on the remote and releases the pool binding so the key becomes available +for reassignment. Discard Pending drops the queued clear without touching the remote; the +binding stays intact and the operator can retry. + +The proposed plan can be inspected before it is applied. Apply Pending walks the queue in +order; if any push or clear fails the remaining queue is kept intact for retry. Discard Pending drops the plan without touching any remote. Permissions diff --git a/lib/pdm-api-types/src/subscription.rs b/lib/pdm-api-types/src/subscription.rs index 559f725d..7d3c8436 100644 --- a/lib/pdm-api-types/src/subscription.rs +++ b/lib/pdm-api-types/src/subscription.rs @@ -352,7 +352,7 @@ pub struct SubscriptionKeyEntry { /// to free the key from `remote`/`node` so it can be reassigned to a different node. /// /// Apply Pending issues a DELETE on the remote and then clears `remote`/`node` on success. - /// Clear Pending only resets this flag and leaves the binding untouched so the operator can + /// Discard Pending only resets this flag and leaves the binding untouched so the operator can /// retry. A bare flag is enough since the (remote, node) binding lives next to it. /// /// Omitted from the serialised representation when false so the on-disk section and the @@ -552,7 +552,7 @@ pub struct RemoteNodeStatus { /// Current key on the node (from remote query). #[serde(skip_serializing_if = "Option::is_none")] pub current_key: Option, - /// True when the pool entry bound to this node has a pending clear queued. + /// True when the pool has a clear queued for this node. Omitted on the wire when false. #[serde(default, skip_serializing_if = "std::ops::Not::not")] pub pending_clear: bool, } @@ -562,7 +562,7 @@ pub struct RemoteNodeStatus { #[serde(rename_all = "kebab-case")] /// Result of the bulk clear-pending API endpoint. pub struct ClearPendingResult { - /// Number of pool entries whose pending push or reissue was cleared. + /// Number of pool entries whose pending push or clear was cleared. pub cleared: u32, } diff --git a/lib/pdm-client/src/lib.rs b/lib/pdm-client/src/lib.rs index 1fed0e85..530f2b5b 100644 --- a/lib/pdm-client/src/lib.rs +++ b/lib/pdm-client/src/lib.rs @@ -901,6 +901,9 @@ impl PdmClient { /// server-side wait surface lands this method becomes a single GET with no behaviour change /// for callers. /// + /// No built-in time bound; wrap in `tokio::time::timeout` if needed. Dropping the future + /// stops the client-side polling only - the server-side worker keeps running. + /// /// Native-only: the polling loop relies on `tokio::time::sleep`, which is not available on /// the wasm32 target the UI builds for. #[cfg(not(target_arch = "wasm32"))] @@ -1270,6 +1273,64 @@ impl PdmClient { .data) } + /// Queue a clear for the subscription on `remote`/`node`. Apply Pending later removes the + /// subscription from the node so the key can be reassigned elsewhere; Discard Pending undoes + /// the queueing without touching the remote. + pub async fn subscription_queue_clear( + &self, + remote: &str, + node: &str, + digest: Option, + ) -> Result<(), Error> { + #[derive(Serialize)] + struct ClearArgs<'a> { + remote: &'a str, + node: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + digest: Option, + } + self.0 + .post( + "/api2/extjs/subscriptions/queue-clear", + &ClearArgs { + remote, + node, + digest, + }, + ) + .await? + .nodata() + } + + /// Drop a queued Clear Key on `remote`/`node` while keeping the pool binding. Used by the + /// per-node Revert action; the global Discard Pending path scrubs every pending change at + /// once. + pub async fn subscription_revert_pending_clear( + &self, + remote: &str, + node: &str, + digest: Option, + ) -> Result<(), Error> { + #[derive(Serialize)] + struct Args<'a> { + remote: &'a str, + node: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + digest: Option, + } + self.0 + .post( + "/api2/extjs/subscriptions/revert-pending-clear", + &Args { + remote, + node, + digest, + }, + ) + .await? + .nodata() + } + /// Clear every pending assignment in one bulk transaction; returns the count of cleared /// entries. pub async fn subscription_clear_pending( diff --git a/server/src/api/subscriptions/mod.rs b/server/src/api/subscriptions/mod.rs index aa3146ec..9c313e8c 100644 --- a/server/src/api/subscriptions/mod.rs +++ b/server/src/api/subscriptions/mod.rs @@ -51,6 +51,14 @@ const SUBDIRS: SubdirMap = &sorted!([ ), ("keys", &KEYS_ROUTER), ("node-status", &Router::new().get(&API_METHOD_NODE_STATUS)), + ( + "queue-clear", + &Router::new().post(&API_METHOD_QUEUE_CLEAR) + ), + ( + "revert-pending-clear", + &Router::new().post(&API_METHOD_REVERT_PENDING_CLEAR), + ), ]); const KEYS_ROUTER: Router = Router::new() @@ -288,7 +296,7 @@ fn get_key(key: String, rpcenv: &mut dyn RpcEnvironment) -> Result, @@ -371,7 +379,7 @@ async fn delete_key( http_bail!( BAD_REQUEST, "key '{key}' is currently bound to a remote node with a live active \ - subscription; release it on the remote first" + subscription; run Clear Key on the Node Subscription Status panel first" ); } @@ -514,7 +522,8 @@ async fn set_assignment( http_bail!( BAD_REQUEST, "key '{key}' is currently bound to a remote node with a live active \ - subscription; release it on the remote before rebinding" + subscription; run Clear Key on the Node Subscription Status panel before \ + rebinding" ); } @@ -551,6 +560,13 @@ async fn set_assignment( .expect("entry verified to exist under lock above"); entry.remote = Some(remote); entry.node = Some(node); + if post_moves { + // Drop any clear queued against the previous owner so it does not silently fire on + // the new node at the next Apply Pending. Only on an actual move - re-asserting the + // same binding must not silently cancel a queued Clear Key (use Revert / Clear + // Pending for that). + entry.pending_clear = false; + } pdm_config::subscriptions::save_config(&config) }) @@ -577,8 +593,8 @@ async fn set_assignment( /// Drop the remote-node binding for a pool key. /// /// Refuses when the binding is currently synced (the assigned key is the live active key on -/// its remote): unassigning then would orphan that subscription, so the operator must release -/// the live subscription on the remote first. +/// its remote): unassigning then would orphan that subscription, so the operator must run +/// Clear Key on the Node Subscription Status panel first. async fn clear_assignment( key: String, digest: Option, @@ -658,7 +674,7 @@ async fn clear_assignment( http_bail!( BAD_REQUEST, "key '{key}' is currently bound to a remote node with a live active \ - subscription; release it on the remote first" + subscription; run Clear Key on the Node Subscription Status panel first" ); } // Safe: the earlier `config.get(&key).cloned()` above proved the key exists, and the @@ -668,6 +684,9 @@ async fn clear_assignment( .expect("entry verified to exist under lock above"); entry.remote = None; entry.node = None; + // pending_clear without a binding is meaningless - reset so a later reassignment does + // not re-trigger a stale teardown. + entry.pending_clear = false; pdm_config::subscriptions::save_config(&config) }) @@ -679,9 +698,9 @@ async fn clear_assignment( /// Pre-lock check for the unassign / delete-key paths ([`clear_assignment`] and [`delete_key`]): /// returns the (remote, node) the entry is currently active on, if any, so the lock-protected -/// branch can refuse the operation and prompt the operator to release the live subscription -/// on the remote first. Returns `None` for entries with no binding, no live subscription, or -/// a live subscription whose key does not match the entry. +/// branch can refuse the operation and prompt the operator to run Clear Key first. Returns +/// `None` for entries with no binding, no live subscription, or a live subscription whose key +/// does not match the entry. /// /// Takes the binding from the caller's pre-read entry rather than re-reading config so the /// remote we hit on the network is the one the caller's pre-priv check already covered: a @@ -749,6 +768,192 @@ async fn push_key_to_remote(remote: &Remote, key: &str, node_name: &str) -> Resu Ok(()) } +/// Tear down a node's subscription via the remote's `/nodes/{node}/subscription` endpoint. +async fn delete_subscription_on_remote( + remote: &Remote, + product_type: ProductType, + node_name: &str, +) -> Result<(), Error> { + match product_type { + ProductType::Pve => { + let client = crate::connection::make_pve_client(remote)?; + client.delete_subscription(node_name).await?; + } + ProductType::Pbs => { + let client = crate::connection::make_pbs_client(remote)?; + client.delete_subscription().await?; + } + ProductType::Pmg | ProductType::Pom => { + bail!("PDM cannot clear '{product_type}' keys: no remote support yet"); + } + } + + info!("removed subscription from {}/{node_name}", remote.id); + Ok(()) +} + +#[api( + input: { + properties: { + remote: { schema: REMOTE_ID_SCHEMA }, + // NODE_SCHEMA rejects path-traversal input before it ends up interpolated into + // the remote URL `/api2/extjs/nodes/{node}/subscription`. + node: { schema: NODE_SCHEMA }, + digest: { + type: ConfigDigest, + optional: true, + }, + }, + }, + access: { + permission: &Permission::Privilege(&["system"], PRIV_SYS_MODIFY, false), + }, +)] +/// Queue a clear on a remote node, that is, mark its subscription for removal so the key can +/// be reassigned elsewhere. +/// +/// Sets `pending_clear` on the pool entry currently bound to (remote, node). Apply Pending +/// later issues the DELETE on the remote and clears the binding on success; Discard Pending only +/// resets the flag and leaves the binding intact so the operator can retry. +/// +/// Refuses if no pool entry is bound to (remote, node): importing a foreign live subscription +/// into the pool is the job of the explicit Adopt Key action, not a side effect of queueing a +/// clear. The split keeps the audit trail honest; queueing a clear should only ever schedule +/// a removal, never silently materialise new pool state the operator did not ask for. +/// +/// Per-remote `PRIV_RESOURCE_MODIFY` is enforced inside the handler so an operator with global +/// system access alone cannot tear down subscriptions on remotes they have no other authority on. +async fn queue_clear( + remote: String, + node: String, + digest: Option, + rpcenv: &mut dyn RpcEnvironment, +) -> Result<(), Error> { + let auth_id: Authid = rpcenv + .get_auth_id() + .context("no authid available")? + .parse()?; + let user_info = CachedUserInfo::new()?; + user_info.check_privs( + &auth_id, + &["resource", &remote], + PRIV_RESOURCE_MODIFY, + false, + )?; + + // No live fetch: the pool entry's binding is authoritative for queueing the operation; the + // worker re-validates against the live remote at apply time and aborts if the remote runs a + // different key. + // + // The lock + sync IO runs on a blocking thread so the async runtime stays free for other + // work even when /etc/proxmox-datacenter-manager/subscriptions is on slow storage. + let new_digest = tokio::task::spawn_blocking(move || -> Result { + let _lock = pdm_config::subscriptions::lock_config()?; + let (mut config, config_digest) = pdm_config::subscriptions::config()?; + config_digest.detect_modification(digest.as_ref())?; + + let bound_id = config + .iter() + .find(|(_, e)| { + e.remote.as_deref() == Some(remote.as_str()) + && e.node.as_deref() == Some(node.as_str()) + }) + .map(|(id, _)| id.to_string()); + + let Some(id) = bound_id else { + http_bail!( + BAD_REQUEST, + "no pool-managed assignment on {remote}/{node}; \ + run Adopt Key first to import a foreign subscription into the pool" + ); + }; + let entry = config + .get_mut(&id) + .expect("entry verified to exist under lock above"); + if entry.pending_clear { + http_bail!(BAD_REQUEST, "clear already queued for {remote}/{node}"); + } + entry.pending_clear = true; + + pdm_config::subscriptions::save_config(&config) + }) + .await??; + rpcenv["digest"] = new_digest.to_hex().into(); + Ok(()) +} + +#[api( + input: { + properties: { + remote: { schema: REMOTE_ID_SCHEMA }, + node: { schema: NODE_SCHEMA }, + digest: { + type: ConfigDigest, + optional: true, + }, + }, + }, + access: { + permission: &Permission::Privilege(&["system"], PRIV_SYS_MODIFY, false), + }, +)] +/// Drop a queued Clear Key on `remote`/`node` while keeping the binding intact. +/// +/// Backs out a Queue Clear on a single node without going through the global Discard Pending +/// path (which scrubs every pending change in the pool). The binding stays so the operator can +/// retry the queueing or leave the live subscription untouched. +async fn revert_pending_clear( + remote: String, + node: String, + digest: Option, + rpcenv: &mut dyn RpcEnvironment, +) -> Result<(), Error> { + let auth_id: Authid = rpcenv + .get_auth_id() + .context("no authid available")? + .parse()?; + let user_info = CachedUserInfo::new()?; + user_info.check_privs( + &auth_id, + &["resource", &remote], + PRIV_RESOURCE_MODIFY, + false, + )?; + + let new_digest = tokio::task::spawn_blocking(move || -> Result { + let _lock = pdm_config::subscriptions::lock_config()?; + let (mut config, config_digest) = pdm_config::subscriptions::config()?; + config_digest.detect_modification(digest.as_ref())?; + + let bound_id = config + .iter() + .find(|(_, e)| { + e.remote.as_deref() == Some(remote.as_str()) + && e.node.as_deref() == Some(node.as_str()) + }) + .map(|(id, _)| id.to_string()); + + let Some(id) = bound_id else { + http_bail!( + BAD_REQUEST, + "no pool-managed assignment on {remote}/{node}" + ); + }; + let entry = config + .get_mut(&id) + .expect("entry verified to exist under lock above"); + if !entry.pending_clear { + http_bail!(BAD_REQUEST, "no pending clear queued on {remote}/{node}"); + } + entry.pending_clear = false; + + pdm_config::subscriptions::save_config(&config) + }) + .await??; + rpcenv["digest"] = new_digest.to_hex().into(); + Ok(()) +} + #[api( input: { properties: { @@ -985,6 +1190,9 @@ async fn bulk_assign( if entry.remote.is_none() { entry.remote = Some(p.remote.clone()); entry.node = Some(p.node.clone()); + // Mirror set_assignment: stale flags from a prior binding cycle must + // not silently fire against the new target at the next Apply Pending. + entry.pending_clear = false; applied.push(p.clone()); } } @@ -1160,7 +1368,6 @@ async fn apply_pending( async fn run_apply_pending(auth_id: Authid) -> Result<(), Error> { let user_info = CachedUserInfo::new()?; let (remotes_config, _) = pdm_config::remotes::config()?; - let (config, _) = pdm_config::subscriptions::config()?; let node_statuses = collect_status_uncached(&remotes_config).await; let pending = compute_pending(&user_info, &auth_id, &node_statuses)?; @@ -1176,34 +1383,103 @@ async fn run_apply_pending(auth_id: Authid) -> Result<(), Error> { for entry in pending { let Some(remote) = remotes_config.get(&entry.remote) else { bail!( - "remote '{}' vanished, aborting after {ok}/{total} successful pushes", + "remote '{}' vanished, aborting after {ok}/{total} successful operations", entry.remote, ); }; - // Honour the case where the operator unassigned the key while the worker was queued. + // Re-read the pool on every iteration: the previous iteration's `save_config` (Clear + // branch) makes the at-start snapshot stale, and a parallel admin's Discard Pending + // between worker start and this iteration must cancel a planned op rather than have us + // execute it against a flag the operator just retracted. + let (config, _) = pdm_config::subscriptions::config()?; if !pool_assignment_still_valid(&config, &entry) { info!( - "skipping {}/{}: pool assignment changed before worker ran", + "skipping {}/{}: pool entry changed before worker ran", + entry.remote, entry.node + ); + continue; + } + // For each branch, the entry's current `pending_clear` state must still match the planned + // op or the operator's intent has changed under us (a Discard Pending fired for Clear, or + // a parallel queue_clear fired for Push). + let current_pending_clear = config + .get(&entry.key) + .map(|e| e.pending_clear) + .unwrap_or(false); + let op_consistent = match entry.op { + PendingOp::Push => !current_pending_clear, + PendingOp::Clear => current_pending_clear, + }; + if !op_consistent { + info!( + "skipping {}/{}: pending state changed before this step ran", entry.remote, entry.node ); continue; } let redacted = redact_key(&entry.key); - info!("pushing {redacted} to {}/{}...", entry.remote, entry.node); - if let Err(err) = push_key_to_remote(remote, &entry.key, &entry.node).await { - bail!( - "push of {redacted} to {}/{} failed after {ok}/{total} successful pushes: {err}", - entry.remote, - entry.node, - ); + match entry.op { + PendingOp::Push => { + info!("pushing {redacted} to {}/{}...", entry.remote, entry.node); + if let Err(err) = push_key_to_remote(remote, &entry.key, &entry.node).await { + bail!( + "push of {redacted} to {}/{} failed after {ok}/{total} successful operations: {err}", + entry.remote, + entry.node, + ); + } + } + PendingOp::Clear => { + let product_type = match ProductType::from_key(&entry.key) { + Some(ty) => ty, + None => bail!("unrecognised key format: {redacted}"), + }; + info!( + "clearing {redacted} from {}/{}...", + entry.remote, entry.node, + ); + if let Err(err) = + delete_subscription_on_remote(remote, product_type, &entry.node).await + { + bail!( + "clear of {redacted} on {}/{} failed after {ok}/{total} successful operations: {err}", + entry.remote, + entry.node, + ); + } + // Clear the binding under the config lock. A subsequent compute_pending call must + // not propose another push or clear for the same entry. The lock + sync IO run + // on a blocking thread so the worker does not park one of the async runtime's + // worker threads on file IO. + let entry_key = entry.key.clone(); + let entry_remote = entry.remote.clone(); + let entry_node = entry.node.clone(); + tokio::task::spawn_blocking(move || -> Result<(), Error> { + let _lock = pdm_config::subscriptions::lock_config()?; + let (mut updated, _) = pdm_config::subscriptions::config()?; + if let Some(stored) = updated.get_mut(&entry_key) { + if stored.remote.as_deref() == Some(entry_remote.as_str()) + && stored.node.as_deref() == Some(entry_node.as_str()) + { + stored.remote = None; + stored.node = None; + stored.pending_clear = false; + } + } + // Worker context: no `rpcenv` to set, post-save digest is unused here. + let _ = pdm_config::subscriptions::save_config(&updated)?; + Ok(()) + }) + .await??; + } } info!(" success"); invalidate_subscription_info_for_remote(&entry.remote); ok += 1; } - info!("finished: {ok}/{total} pushes succeeded"); + info!("finished: {ok}/{total} operations succeeded"); Ok(()) } @@ -1221,14 +1497,19 @@ async fn run_apply_pending(auth_id: Authid) -> Result<(), Error> { permission: &Permission::Privilege(&["system"], PRIV_SYS_MODIFY, false), }, )] -/// Clear every pending assignment in one bulk transaction. +/// Drop every queued change in one bulk transaction. /// -/// Pending = pool key bound to a remote node whose live `current_key` does not match the -/// assigned pool key (a different live key, no key, or no row returned at all because the remote -/// is unreachable / the node is gone). Clears only those entries the caller has -/// `PRIV_RESOURCE_MODIFY` on; remotes the caller may only audit are skipped. Mirrors -/// `apply-pending` but drops the assignments instead of pushing them, so an operator can disown -/// stuck assignments without first having to bring the target back online. +/// Two shapes of pending change are discarded: +/// * pool key bound to a remote node whose live `current_key` does not match the assigned +/// pool key (a different live key, no key, or no row returned at all because the remote is +/// unreachable / the node is gone): the binding is dropped, the key returns to the free +/// pool, and the remote is not touched. +/// * queued Clear Keys (`pending_clear = true`): the flag is cleared; the binding and the +/// live key on the remote stay intact. +/// +/// Only entries the caller has `PRIV_RESOURCE_MODIFY` on are touched; remotes the caller may +/// only audit are skipped. Mirrors `apply-pending` but drops the queue instead of pushing it, +/// so an operator can disown stuck changes without first having to bring the target back online. /// /// The optional `digest` is checked twice: once before the live-state fetch so a stale browser /// tab is rejected up-front, and again under the config lock so a parallel admin edit between @@ -1269,8 +1550,23 @@ async fn clear_pending( if stored.remote.as_deref() == Some(entry.remote.as_str()) && stored.node.as_deref() == Some(entry.node.as_str()) { - stored.remote = None; - stored.node = None; + match entry.op { + PendingOp::Clear => { + // Only reset the flag - leave the binding so the operator can + // retry the clear without having to re-import a foreign key + // from scratch. + stored.pending_clear = false; + } + PendingOp::Push => { + stored.remote = None; + stored.node = None; + // Defensive: an entry that flipped to pending_clear between + // the pre-lock snapshot and now would otherwise leave a + // meaningless flag on a now-unbound entry. Reset alongside the + // binding clear. + stored.pending_clear = false; + } + } cleared += 1; } } @@ -1293,12 +1589,21 @@ async fn clear_pending( Ok(ClearPendingResult { cleared }) } -/// Plan entry for one pending push. +/// Plan entry for one pending push or clear. #[derive(Clone, Debug)] struct PendingEntry { key: String, remote: String, node: String, + op: PendingOp, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum PendingOp { + /// PUT the assigned key to the remote because the live state does not match. + Push, + /// DELETE the subscription on the remote and clear the binding on success. + Clear, } fn compute_pending( @@ -1318,6 +1623,15 @@ fn compute_pending( return None; } + if entry.pending_clear { + return Some(PendingEntry { + key: entry.key.clone(), + remote: remote.to_string(), + node: node.to_string(), + op: PendingOp::Clear, + }); + } + // Pending push = the live current key on the node does not match the assigned pool // key. Subscription health (Invalid, Expired, ...) is a separate axis surfaced via // the Status column; re-pushing the same key would not change the shop's verdict. @@ -1335,6 +1649,7 @@ fn compute_pending( key: entry.key.clone(), remote: remote.to_string(), node: node.to_string(), + op: PendingOp::Push, }) }) .collect()) diff --git a/ui/src/configuration/subscription_keys.rs b/ui/src/configuration/subscription_keys.rs index e43543ae..5807504d 100644 --- a/ui/src/configuration/subscription_keys.rs +++ b/ui/src/configuration/subscription_keys.rs @@ -5,7 +5,9 @@ use std::rc::Rc; use anyhow::Error; use pdm_api_types::remotes::RemoteType; -use pdm_api_types::subscription::{ProductType, RemoteNodeStatus, SubscriptionKeyEntry}; +use pdm_api_types::subscription::{ + ProductType, RemoteNodeStatus, SubscriptionKeyEntry, +}; use yew::virtual_dom::{Key, VComp, VNode}; use proxmox_yew_comp::percent_encoding::percent_encode_component; @@ -345,7 +347,7 @@ impl LoadableComponent for SubscriptionKeyGridComp { }); let body = match assignment { Some((remote, node)) => tr!( - "Remove {key} from the key pool? It is still assigned to {remote}/{node}; the assignment is released without removing the subscription on the remote.", + "Remove {key} from the key pool? It is still assigned to {remote}/{node}; the assignment is released without removing any subscription on the remote. Use Clear Key on the Node Subscription Status panel first to release a live subscription on that node too.", key = key.to_string(), remote = remote, node = node, @@ -373,7 +375,8 @@ impl LoadableComponent for SubscriptionKeyGridComp { /// Returns true when the pool entry's binding currently runs the same key on the remote and is /// Active - meaning a clear-assignment would orphan the live subscription. Mirrors the -/// server-side gate; the operator must release the live subscription on the remote first. +/// server-side gate; the operator must run Clear Key on the Node Subscription Status panel +/// first. fn is_synced_assignment(entry: &SubscriptionKeyEntry, statuses: &[RemoteNodeStatus]) -> bool { let (Some(remote), Some(node)) = (entry.remote.as_deref(), entry.node.as_deref()) else { return false; diff --git a/ui/src/configuration/subscription_registry.rs b/ui/src/configuration/subscription_registry.rs index 513fa3a0..7471fae4 100644 --- a/ui/src/configuration/subscription_registry.rs +++ b/ui/src/configuration/subscription_registry.rs @@ -209,6 +209,14 @@ impl From for VNode { } } +/// What the per-node Revert button should do on the selected entry. +enum RevertTarget { + /// Drop the pool's binding (the key was assigned but never pushed). Carries the pool key. + Unassign(String), + /// Cancel a queued Clear Key while keeping the binding intact. + CancelClear { remote: String, node: String }, +} + pub enum Msg { LoadFinished { nodes: Vec, @@ -220,9 +228,11 @@ pub enum Msg { BulkAssignApply(AutoAssignProposal), ApplyPending, ClearPending, - /// Revert the pending change on the currently-selected node: drop the unpushed pool - /// assignment without touching the remote. + /// Revert the pending change on the currently-selected node: drop an unpushed binding or + /// cancel a queued Clear Key (dispatched on the [`RevertTarget`] variant). RevertSelectedNode, + /// Open the confirmation dialog for queueing a clear on the selected node. + QueueClearForSelectedNode, /// Open the Assign Key dialog for the currently-selected node. AssignKeyToSelectedNode, } @@ -232,6 +242,13 @@ pub enum ViewState { ConfirmAutoAssign(AutoAssignProposal), ConfirmApplyPending, ConfirmClearPending, + /// Pending confirmation to queue a clear for `(remote, node)`. The current key on the + /// node is shown in the dialog body when available. + ConfirmQueueClear { + remote: String, + node: String, + current_key: Option, + }, /// Assign a pool key to the given node. Opens from the right panel's Assign Key button. AssignKeyToNode { remote: String, @@ -590,23 +607,46 @@ impl LoadableComponent for SubscriptionRegistryComp { }); } Msg::RevertSelectedNode => { - let Some(key) = self.clear_assignment_target_key() else { + let Some(target) = self.revert_target() else { return false; }; let link = ctx.link().clone(); let digest = self.pool_digest.clone(); ctx.link().spawn(async move { - let url = format!( - "/subscriptions/keys/{}/assignment", - percent_encode_component(&key), - ); - let query = digest.map(|d| serde_json::json!({ "digest": d })); - if let Err(err) = http_delete(&url, query).await { - link.show_error(tr!("Revert"), err.to_string(), true); + let err_msg: Option = match target { + RevertTarget::Unassign(key) => { + let url = format!( + "/subscriptions/keys/{}/assignment", + percent_encode_component(&key), + ); + let query = digest.map(|d| serde_json::json!({ "digest": d })); + http_delete(&url, query).await.err().map(|e| e.to_string()) + } + RevertTarget::CancelClear { remote, node } => { + let digest = digest.map(pdm_client::ConfigDigest::from); + crate::pdm_client() + .subscription_revert_pending_clear(&remote, &node, digest) + .await + .err() + .map(|e| e.to_string()) + } + }; + if let Some(msg) = err_msg { + link.show_error(tr!("Revert"), msg, true); } link.send_reload(); }); } + Msg::QueueClearForSelectedNode => { + let Some((remote, node, current_key)) = self.selected_node_for_clear() else { + return false; + }; + ctx.link().change_view(Some(ViewState::ConfirmQueueClear { + remote, + node, + current_key, + })); + } Msg::AssignKeyToSelectedNode => { let Some((remote, node, ty, node_sockets)) = self.assign_target_for_selected_node() @@ -770,7 +810,8 @@ impl LoadableComponent for SubscriptionRegistryComp { Some( ConfirmDialog::new( tr!("Discard Pending Changes"), - tr!("Discard all assignments that have not yet been applied to the remote nodes?"), + tr!("Discard all queued assignments and cancel all queued Clear Key actions? \ + The remote nodes are not touched."), ) .icon_class("fa fa-question-circle") .on_confirm({ @@ -787,6 +828,61 @@ impl LoadableComponent for SubscriptionRegistryComp { ViewState::ConfirmAutoAssign(proposal) => { Some(self.render_auto_assign_dialog(ctx, proposal)) } + ViewState::ConfirmQueueClear { + remote, + node, + current_key, + } => { + use pwt::widget::ConfirmDialog; + let question = match current_key { + Some(k) => tr!( + "Queue a clear of {key} on {remote}/{node}?", + key = k.clone(), + remote = remote.clone(), + node = node.clone(), + ), + None => tr!( + "Queue a clear on {remote}/{node}?", + remote = remote.clone(), + node = node.clone(), + ), + }; + let body = Column::new() + .gap(2) + .with_child(Container::from_tag("p").with_child(question)) + .with_child(Container::from_tag("p").with_child(tr!( + "'Apply Pending' will remove the subscription from the node so the key can be reassigned elsewhere; 'Discard Pending' undoes the queueing without touching the remote." + ))); + let remote_for_cb = remote.clone(); + let node_for_cb = node.clone(); + let link = ctx.link().clone(); + let close_link = ctx.link().clone(); + let digest_for_cb = self.pool_digest.clone(); + Some( + ConfirmDialog::default() + .title(tr!("Clear Key")) + .confirm_message(body) + .on_confirm(move |_| { + let link = link.clone(); + let remote = remote_for_cb.clone(); + let node = node_for_cb.clone(); + let digest = digest_for_cb.clone(); + link.clone().spawn(async move { + let digest = digest.map(pdm_client::ConfigDigest::from); + if let Err(err) = crate::pdm_client() + .subscription_queue_clear(&remote, &node, digest) + .await + { + link.show_error(tr!("Clear Key"), err.to_string(), true); + } + link.change_view(None); + link.send_reload(); + }); + }) + .on_close(move |_| close_link.change_view(None)) + .into(), + ) + } ViewState::AssignKeyToNode { remote, node, @@ -847,7 +943,8 @@ impl SubscriptionRegistryComp { .class(FlexFit); let can_assign_key = self.assign_target_for_selected_node().is_some(); - let can_revert = self.clear_assignment_target_key().is_some(); + let can_revert = self.revert_target().is_some(); + let can_clear_key = self.selected_node_for_clear().is_some(); let assign_button = Tooltip::new( Button::new(tr!("Assign Key")) .icon_class("fa fa-link") @@ -865,8 +962,18 @@ impl SubscriptionRegistryComp { .on_activate(ctx.link().callback(|_| Msg::RevertSelectedNode)), ) .tip(tr!( - "Revert the pending change on the selected node: drop an unpushed pool \ - assignment without touching the remote." + "Drop the pending pool change on the selected node." + )); + let clear_key_button = Tooltip::new( + Button::new(tr!("Clear Key")) + .icon_class("fa fa-recycle") + .disabled(!can_clear_key) + .on_activate(ctx.link().callback(|_| Msg::QueueClearForSelectedNode)), + ) + .tip(tr!( + "Queue the live subscription on the selected node for removal at next Apply \ + Pending, freeing the key for reassignment. Requires the node to be \ + pool-managed." )); Panel::new() @@ -877,6 +984,7 @@ impl SubscriptionRegistryComp { .title(tr!("Node Subscription Status")) .with_tool(assign_button) .with_tool(revert_button) + .with_tool(clear_key_button) .with_child(table) } @@ -926,19 +1034,40 @@ impl SubscriptionRegistryComp { .find(|n| n.remote == remote && n.node == node) } - /// Returns the assigned key when Revert is appropriate: there is a binding AND it has not - /// yet been pushed (different from current_key, or the node is not Active). For an - /// already-synced assignment, clearing would orphan the live subscription on the remote, - /// so the operator must take a different path (introduced later in the series). - fn clear_assignment_target_key(&self) -> Option { + /// Resolve the selected node into a Revert action target. + /// + /// Two kinds of pending state are revertible per-node: an unpushed pool assignment (drop + /// the binding entirely, same as the old Clear Assignment), and a queued Clear Key (drop + /// the flag, keep the binding). A synced binding without a queued clear is not pending, + /// so the button is disabled; freeing such a binding requires Clear Key. + fn revert_target(&self) -> Option { let n = self.selected_node_status()?; + if n.pending_clear { + return Some(RevertTarget::CancelClear { + remote: n.remote.clone(), + node: n.node.clone(), + }); + } let assigned = n.assigned_key.as_ref()?; let synced = n.status == proxmox_subscription::SubscriptionStatus::Active && n.current_key.as_deref() == Some(assigned.as_str()); if synced { return None; } - Some(assigned.clone()) + Some(RevertTarget::Unassign(assigned.clone())) + } + + /// Returns `(remote, node, current_key)` when the selected node has a pool-managed + /// subscription that can be queued for clear: there is a live key, no clear is already + /// queued for it, and a pool entry is bound to (remote, node). The pool-binding gate + /// mirrors the server-side refusal so foreign live subscriptions do not offer Clear Key + /// (they need Adopt Key first). + fn selected_node_for_clear(&self) -> Option<(String, String, Option)> { + let n = self.selected_node_status()?; + if n.pending_clear || n.current_key.is_none() || n.assigned_key.is_none() { + return None; + } + Some((n.remote.clone(), n.node.clone(), n.current_key.clone())) } /// Returns `(remote, node, type, node_sockets)` for the right-panel Assign button: -- 2.47.3