From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id AC53760771; Wed, 16 Feb 2022 14:39:28 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id A2D67515A; Wed, 16 Feb 2022 14:39:28 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 83B835149; Wed, 16 Feb 2022 14:39:26 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 5492546D61; Wed, 16 Feb 2022 14:39:26 +0100 (CET) From: Wolfgang Bumiller To: pbs-devel@lists.proxmox.com Cc: pve-devel@lists.proxmox.com, Thomas Lamprecht , Dominik Csapak , =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= , Hannes Laimer Date: Wed, 16 Feb 2022 14:39:17 +0100 Message-Id: <20220216133917.101133-1-w.bumiller@proxmox.com> X-Mailer: git-send-email 2.30.2 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.388 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pve-devel] [RFC proxmox] support quoted strings in property strings X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 Feb 2022 13:39:28 -0000 This allows free form text to exist within property strings, quoted, like: key="A value with \"quotes, also commas",key2=value2 or also: "the value for a default_key",key2=value2 And drop ';' as a key=value separator since those are meant for arrays inside property strings... Signed-off-by: Wolfgang Bumiller --- This is mostly a reaction to Hannes' maintenance mode series. I think it would make more sense to improve our "property string specification" (as much as there is one :P) to support quoted strings. This way we can avoid the url encoding mess. We could also do this in PVE (which would be particularly useful if we want to allow adding notes to net/disk devices). AFAICT the only property strings containing string values which would in *theory* allow quotes in the beginning wouldn't work with them in *practice*, (eg. the 'path' in a container's mount point, or an 'mdev' in a VM's hostpci entry?) proxmox-schema/src/lib.rs | 2 + proxmox-schema/src/property_string.rs | 163 ++++++++++++++++++++++++++ proxmox-schema/src/schema.rs | 25 ++-- 3 files changed, 177 insertions(+), 13 deletions(-) create mode 100644 proxmox-schema/src/property_string.rs diff --git a/proxmox-schema/src/lib.rs b/proxmox-schema/src/lib.rs index 4e98443..13b0739 100644 --- a/proxmox-schema/src/lib.rs +++ b/proxmox-schema/src/lib.rs @@ -19,6 +19,8 @@ pub use const_regex::ConstRegexPattern; pub mod de; pub mod format; +pub(crate) mod property_string; + mod schema; pub use schema::*; diff --git a/proxmox-schema/src/property_string.rs b/proxmox-schema/src/property_string.rs new file mode 100644 index 0000000..a40c1ca --- /dev/null +++ b/proxmox-schema/src/property_string.rs @@ -0,0 +1,163 @@ +//! Property string parsing +//! This may at some point also get a proper direkt `Serializer`/`Deserializer` for property +//! strings. + +use std::borrow::Cow; +use std::mem; + +use anyhow::{bail, format_err, Error}; + +/// Iterate over the `key=value` pairs of a property string. +/// +/// Note, that the `key` may be optional when the schema defines a "default" key. +/// If the value does not require stripping backslash escapes it will be borrowed, otherwise an +/// owned `String` will be returned. +pub struct PropertyIterator<'a> { + data: &'a str, +} + +impl<'a> PropertyIterator<'a> { + pub fn new(data: &'a str) -> Self { + Self { data } + } +} + +impl<'a> Iterator for PropertyIterator<'a> { + type Item = Result<(Option<&'a str>, Cow<'a, str>), Error>; + + fn next(&mut self) -> Option { + if self.data.is_empty() { + return None; + } + + let key = if self.data.starts_with('"') { + // value without key and quoted + None + } else { + let key = match self.data.find(&[',', '=']) { + Some(pos) if self.data.as_bytes()[pos] == b',' => None, + Some(pos) => Some(ascii_split_off(&mut self.data, pos)), + None => None, + }; + + if !self.data.starts_with('"') { + let value = match self.data.find(',') { + Some(pos) => ascii_split_off(&mut self.data, pos), + None => mem::take(&mut self.data), + }; + return Some(Ok((key, Cow::Borrowed(value)))); + } + + key + }; + + let value = match parse_quoted_string(&mut self.data) { + Ok(value) => value, + Err(err) => return Some(Err(err)), + }; + + if !self.data.is_empty() { + if self.data.starts_with(',') { + self.data = &self.data[1..]; + } else { + return Some(Err(format_err!("garbage after quoted string"))); + } + } + + Some(Ok((key, value))) + } +} + +impl<'a> std::iter::FusedIterator for PropertyIterator<'a> {} + +/// Parse a quoted string and move `data` to after the closing quote. +/// +/// The string must start with a double quote. +/// +/// This allows `\"`, `\\` and `\n` as escape sequences. +/// +/// If no escape sequence is found, `Cow::Borrowed` is returned, otherwise an owned `String` is +/// returned. +fn parse_quoted_string<'s>(data: &'_ mut &'s str) -> Result, Error> { + let data_out = data; + let data = data_out.as_bytes(); + + if data[0] != b'"' { + bail!("not a quoted string"); + } + + let mut i = 1; + while i != data.len() { + if data[i] == b'"' { + // unsafe: we're at an ascii boundary and index [0] is an ascii quote + let value = unsafe { std::str::from_utf8_unchecked(&data[1..i]) }; + *data_out = unsafe { std::str::from_utf8_unchecked(&data[(i + 1)..]) }; + return Ok(Cow::Borrowed(value)); + } else if data[i] == b'\\' { + // we cannot borrow + break; + } + i += 1; + } + if i == data.len() { + // reached the end before reaching a quote + bail!("unexpected end of string"); + } + + // we're now at the first backslash, don't include it in the output: + let mut out = Vec::with_capacity(data.len()); + out.extend_from_slice(&data[1..i]); + i += 1; + let mut was_backslash = true; + while i != data.len() { + match (data[i], mem::replace(&mut was_backslash, false)) { + (b'"', false) => { + i += 1; + break; + } + (b'"', true) => out.push(b'"'), + (b'\\', true) => out.push(b'\\'), + (b'n', true) => out.push(b'\n'), + (_, true) => bail!("unsupported escape sequence"), + (b'\\', false) => was_backslash = true, + (ch, false) => out.push(ch), + } + i += 1; + } + + // unsafe: we know we're at an ascii boundary + *data_out = unsafe { std::str::from_utf8_unchecked(&data[i..]) }; + Ok(Cow::Owned(unsafe { String::from_utf8_unchecked(out) })) +} + +/// Like `str::split_at` but with assumes `mid` points to an ASCII character and the 2nd slice +/// *excludes* `mid`. +fn ascii_split_around(s: &str, mid: usize) -> (&str, &str) { + (&s[..mid], &s[(mid + 1)..]) +} + +/// Split "off" the first `mid` bytes of `s`, advancing it to `mid + 1` (assuming `mid` points to +/// an ASCII character!). +fn ascii_split_off<'a, 's>(s: &'a mut &'s str, mid: usize) -> &'s str { + let (a, b) = ascii_split_around(s, mid); + *s = b; + a +} + +#[test] +fn iterate_over_property_string() { + let data = r#""value w/o key",fst=v1,sec="with = in it",third=3,"and \" and '\\'""#; + let mut iter = PropertyIterator::new(data).map(|entry| entry.unwrap()); + assert_eq!(iter.next().unwrap(), (None, Cow::Borrowed("value w/o key"))); + assert_eq!(iter.next().unwrap(), (Some("fst"), Cow::Borrowed("v1"))); + assert_eq!( + iter.next().unwrap(), + (Some("sec"), Cow::Borrowed("with = in it")) + ); + assert_eq!(iter.next().unwrap(), (Some("third"), Cow::Borrowed("3"))); + assert_eq!( + iter.next().unwrap(), + (None, Cow::Borrowed(r#"and " and '\'"#)) + ); + assert!(iter.next().is_none()); +} diff --git a/proxmox-schema/src/schema.rs b/proxmox-schema/src/schema.rs index 44c56e7..58c6a82 100644 --- a/proxmox-schema/src/schema.rs +++ b/proxmox-schema/src/schema.rs @@ -904,19 +904,18 @@ impl Schema { schema: T, default_key: Option<&'static str>, ) -> Result { - let mut param_list: Vec<(String, String)> = vec![]; - let key_val_list: Vec<&str> = value_str - .split(|c: char| c == ',' || c == ';') - .filter(|s| !s.is_empty()) - .collect(); - for key_val in key_val_list { - let kv: Vec<&str> = key_val.splitn(2, '=').collect(); - if kv.len() == 2 { - param_list.push((kv[0].trim().into(), kv[1].trim().into())); - } else if let Some(key) = default_key { - param_list.push((key.into(), kv[0].trim().into())); - } else { - bail!("Value without key, but schema does not define a default key."); + let mut param_list = Vec::new(); + for entry in crate::property_string::PropertyIterator::new(value_str) { + let (key, value) = entry?; + match key { + Some(key) => param_list.push((key.to_string(), value.into_owned())), + None => { + if let Some(key) = default_key { + param_list.push((key.to_string(), value.into_owned())); + } else { + bail!("Value without key, but schema does not define a default key."); + } + } } } -- 2.30.2