From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id A810273B9F for ; Fri, 16 Apr 2021 15:36:24 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 82B6524FEF for ; Fri, 16 Apr 2021 15:35:39 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 8FFB724EE8 for ; Fri, 16 Apr 2021 15:35:27 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 5C09445B11 for ; Fri, 16 Apr 2021 15:35:27 +0200 (CEST) From: Wolfgang Bumiller To: pbs-devel@lists.proxmox.com Date: Fri, 16 Apr 2021 15:35:06 +0200 Message-Id: <20210416133517.23349-14-w.bumiller@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20210416133517.23349-1-w.bumiller@proxmox.com> References: <20210416133517.23349-1-w.bumiller@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.470 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SCC_5_SHORT_WORD_LINES 1 5 lines with many short words SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [mod.rs, de.rs, ser.rs, tools.rs] Subject: [pbs-devel] [RFC backup 13/23] add 'config file format' to tools::config X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 16 Apr 2021 13:36:24 -0000 This is a serde-based parser for the file format our perl code read via `PVE::JSONSchema::parse_config`. This will be used for the node config. Some support for indexed arrays at the top level is available but currently commented out and unused as this is not really compatible with how we write our schemas, since we store property strings as actual strings using an object schema as *format* property in the `StringSchema`. Ideally this could be changed in the future and we can integrate the serde parsing model more easily without having to convert between strings manually in the code. Signed-off-by: Wolfgang Bumiller --- src/tools.rs | 1 + src/tools/config/de.rs | 656 ++++++++++++++++++++++++++++++++++++++++ src/tools/config/mod.rs | 89 ++++++ src/tools/config/ser.rs | 642 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 1388 insertions(+) create mode 100644 src/tools/config/de.rs create mode 100644 src/tools/config/mod.rs create mode 100644 src/tools/config/ser.rs diff --git a/src/tools.rs b/src/tools.rs index 890db826..25323881 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -23,6 +23,7 @@ pub mod async_io; pub mod borrow; pub mod cert; pub mod compression; +pub mod config; pub mod cpio; pub mod daemon; pub mod disks; diff --git a/src/tools/config/de.rs b/src/tools/config/de.rs new file mode 100644 index 00000000..f7d9b79b --- /dev/null +++ b/src/tools/config/de.rs @@ -0,0 +1,656 @@ +use std::collections::hash_map::{self, HashMap}; + +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{alpha1, alphanumeric1, char, multispace0}, + combinator::recognize, + multi::many0, + sequence::{delimited, pair}, +}; +use serde::de::{self, DeserializeSeed, IntoDeserializer, MapAccess, SeqAccess, Visitor}; +use serde::{forward_to_deserialize_any, Deserialize}; + +use proxmox::api::schema::{parse_simple_value, ArraySchema, ObjectSchemaType, Schema}; + +use super::Error; + +type IResult> = Result<(I, O), nom::Err>; + +type ObjSchemaType = &'static (dyn ObjectSchemaType + Send + Sync + 'static); + +impl de::Error for Error { + fn custom(msg: T) -> Self + where + T: std::fmt::Display, + { + Error::Custom(msg.to_string()) + } +} + +/// Top level (line-by-line) parser for our old pve-style config files. +/// +/// The top level parser is responsible for fetching lines and splitting the `key: value` parts. +/// It has 2 jobs: +/// 1) Act as a `MapAccess` implementation in serde and forward the `value` to the 2nd level parser. +/// 2) Collect values belonging to arrays separately and insert them into the object schema at +/// the very end. +/// +/// This of course means that the top level parser only ever handles object schemas. +struct TopLevelDeserializer<'de> { + input: TopLevelInput<'de>, + schema: ObjSchemaType, + + // 'current' and 'current_array' could be turned into one 3-state enum + current: Option>, + arrays: ArrayState<'de>, + current_array: Option>, +} + +/// Filled by `MapAccess::next_key_seed` with the current line's info. +struct CurrentProperty<'de> { + key: &'de str, + value: &'de str, + schema: Option<&'static Schema>, +} + +/// The top level parser's input state is split out for borrowing purposes. +struct TopLevelInput<'de> { + input: std::str::Lines<'de>, + line: usize, + comments: Vec<&'de str>, +} + +impl<'de> TopLevelInput<'de> { + fn next_line(&mut self) -> Option<&'de str> { + loop { + let line = self.input.next()?.trim_start(); + self.line += 1; + if !line.is_empty() && !line.starts_with('#') { + return Some(line.trim()); + } + self.comments.push(line); + } + } +} + +/// This is used for top-level arrays for which the elements have been collected by the +/// `TopLevelDeserializer`. +/// +/// When going through the accumulated arrays in the `TopLevelDeserializer`, it produces an +/// instance of this struct and hands off deserialization to it. +struct TopLevelArrayDeserializer<'de> { + /// We keep this for error messages. + key: &'de str, + + schema: &'static ArraySchema, + + values: std::vec::IntoIter<(usize, &'de str)>, +} + +enum ArrayState<'de> { + /// For each array key we accumulate the values to process at the end in order to fit serde's + /// parsing model. We store the index, value and, for convenience, the schema. + Accumulating(HashMap<&'de str, (Vec<(usize, &'de str)>, &'static ArraySchema)>), + + /// At the end of the file we iterate through the hashmap: + Handling(hash_map::IntoIter<&'de str, (Vec<(usize, &'de str)>, &'static ArraySchema)>), + + Done, +} + +impl<'de> TopLevelDeserializer<'de> { + pub fn from_str(input: &'de str, schema: &'static Schema) -> Result { + match schema { + Schema::Object(schema) => Ok(Self { + input: TopLevelInput { + input: input.lines(), + line: 0, + comments: Vec::new(), + }, + schema, + current: None, + arrays: ArrayState::Accumulating(HashMap::default()), + current_array: None, + }), + _ => Err(Error::BadSchema("toplevel schema must be an ObjectSchema")), + } + } + + /* + * Should we generally parse into a wrapper struct which keeps comments around? + pub fn take_comments(&mut self) -> Vec<&'de str> { + std::mem::take(&mut self.input.comments) + } + */ +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut TopLevelDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + // At the top level this is always an object schema, so we forward everything to `map`: + self.deserialize_map(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_map(self) + } + + // forward the rest as well: + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct struct enum identifier ignored_any + } +} + +impl<'de, 'a> MapAccess<'de> for &'a mut TopLevelDeserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + loop { + let array_map = match &mut self.arrays { + ArrayState::Accumulating(map) => map, + ArrayState::Handling(iter) => match iter.next() { + Some((key, (mut values, schema))) => { + values.sort_by(|a, b| a.0.cmp(&b.0)); + self.current_array = Some(TopLevelArrayDeserializer { + key, + schema, + values: values.into_iter(), + }); + return seed.deserialize(key.into_deserializer()).map(Some); + } + None => { + self.arrays = ArrayState::Done; + return Ok(None); + } + }, + ArrayState::Done => return Ok(None), + }; + + let input = match self.input.next_line() { + Some(line) => line, + None => { + self.arrays = ArrayState::Handling(std::mem::take(array_map).into_iter()); + continue; + } + }; + + // Split the line into key and value. `value` is the "rest" of the input, the "pair" is the + // key and the colon. + let (value, (key, _)) = + pair(identifier, delimited(multispace0, char(':'), multispace0))(input) + .map_err(|err| nom_err(input, "key", err))?; + + // Array handling: + /* + * Enabling this without special schema options *will* break hardcoded index-suffixed + * "manual" arrays. + * + if let Some((key, index)) = array_identifier(key)? { + match self.schema.lookup(key) { + Some((_optional, Schema::Array(schema))) => { + array_map + .entry(key) + .or_insert_with(|| (Vec::new(), schema)) + .0 + .push((index, value)); + continue; + } + Some(_) => return Err(Error::NotAnArray(key.to_owned())), + None => { + if self.schema.additional_properties() { + return Err(Error::AdditionalArray(key.to_owned())); + } + return Err(Error::UnexpectedKey(key.to_owned())); + } + } + } + */ + + match self.schema.lookup(key) { + Some((_optional, schema)) => { + self.current = Some(CurrentProperty { + key, + value, + schema: Some(schema), + }); + } + None => { + if self.schema.additional_properties() { + self.current = Some(CurrentProperty { + key, + value, + schema: None, + }); + } + } + } + + return seed.deserialize(key.into_deserializer()).map(Some); + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + if let Some(current) = self.current.take() { + return de_seed(current.value, Some(current.key), current.schema, seed); + } + + if let Some(mut current) = self.current_array.take() { + return seed.deserialize(&mut current); + } + + Err(Error::BadState("missing current property")) + } +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut TopLevelArrayDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(self) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct tuple + tuple_struct map struct enum identifier ignored_any + } +} + +impl<'de, 'a> SeqAccess<'de> for &'a mut TopLevelArrayDeserializer<'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: V) -> Result, Error> + where + V: DeserializeSeed<'de>, + { + match self.values.next() { + Some((_index, input)) => { + de_seed(input, Some(self.key), Some(self.schema.items), seed).map(Some) + } + None => Ok(None), + } + } +} + +/// Deserialize values of a fixed type while allowing option types. Do not use this genericly +/// because it drops all the specific info and only goes over `deserialize_any`. +struct SomeDeserializer(T); + +impl<'de, T: de::Deserializer<'de>> de::Deserializer<'de> for SomeDeserializer { + type Error = T::Error; + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any + } + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.0.deserialize_any(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_some(self.0) + } +} + +/// The most "common" deserialization code path for object types. +/// +/// From this point on, objects become property strings and arrays become semicolon, zero, space or +/// comma separated values. Though commas can only appear if the previous "layer" was not already a +/// property string. +fn de_seed<'de, V>( + input: &'de str, + key: Option<&str>, + schema: Option<&'static Schema>, + seed: V, +) -> Result +where + V: DeserializeSeed<'de>, +{ + let key = key.unwrap_or("value"); + match schema { + Some(Schema::Null) => Err(Error::NullSchema), + Some(Schema::Boolean(_schema)) => { + let value = match input.to_lowercase().as_str() { + "1" | "on" | "yes" | "true" => true, + "0" | "off" | "no" | "false" => false, + _ => return Err(Error::Custom(format!("invalid boolean value: {}", input))), + }; + seed.deserialize(SomeDeserializer(value.into_deserializer())) + } + Some(schema @ Schema::Integer(_)) => seed + .deserialize( + parse_simple_value(input, schema) + .map_err(|err| Error::Custom(format!("bad {} - {}", key, err)))?, + ) + .map_err(|err| Error::Custom(format!("bad {} - {}", key, err))), + Some(schema @ Schema::Number(_)) => seed + .deserialize( + parse_simple_value(input, schema) + .map_err(|err| Error::Custom(format!("bad {} - {}", key, err)))?, + ) + .map_err(|err| Error::Custom(format!("bad {} - {}", key, err))), + Some(Schema::String(schema)) => { + schema + .check_constraints(input) + .map_err(|err| Error::Custom(format!("bad {} - {}", key, err)))?; + seed.deserialize(SomeDeserializer(input.into_deserializer())) + } + Some(Schema::Object(schema)) => { + let mut de = PropertyStringDeserializer { + input, + schema: &*schema, + default_key: schema.default_key, + current: None, + }; + seed.deserialize(&mut de) + } + Some(Schema::AllOf(schema)) => { + let mut de = PropertyStringDeserializer { + input, + schema: &*schema, + default_key: None, + current: None, + }; + seed.deserialize(&mut de) + } + Some(Schema::Array(schema)) => { + let mut de = ArrayDeserializer { input, schema }; + seed.deserialize(&mut de) + } + None => seed.deserialize(SomeDeserializer(input.into_deserializer())), + } +} + +pub fn from_str<'de, T>(input: &'de str, schema: &'static Schema) -> Result +where + T: Deserialize<'de>, +{ + let mut deserializer = TopLevelDeserializer::from_str(input, schema)?; + let t: T = T::deserialize(&mut deserializer)?; + if deserializer.input.next_line().is_none() { + Ok(t) + } else { + Err(Error::TrailingCharacters) + } +} + +pub fn from_slice<'de, T>(input: &'de [u8], schema: &'static Schema) -> Result +where + T: Deserialize<'de>, +{ + from_str( + std::str::from_utf8(input).map_err(|_| Error::NonUtf8)?, + schema, + ) +} + +pub fn from_property_string<'de, T>(input: &'de str, schema: &'static Schema) -> Result +where + T: Deserialize<'de>, +{ + let (schema, default_key): (ObjSchemaType, Option<&'static str>) = match schema { + Schema::Object(obj) => (obj as _, obj.default_key), + Schema::AllOf(obj) => (obj as _, None), + _ => { + return Err(Error::BadSchema( + "cannot deserialize non-object from a property string", + )); + } + }; + + T::deserialize(&mut PropertyStringDeserializer { + input, + schema, + default_key, + current: None, + }) +} + +fn identifier(i: &str) -> IResult<&str, &str> { + recognize(pair( + alt((alpha1, tag("_"))), + many0(alt((alphanumeric1, tag("_")))), + ))(i) +} + +/* +fn array_identifier(i: &str) -> Result, Error> { + if let Some(last_nondigit) = i.rfind(|c: char| !c.is_ascii_digit()) { + if last_nondigit != (i.len() - 1) { + return Ok(Some(( + &i[..=last_nondigit], + i[(last_nondigit + 1)..] + .parse::() + .map_err(|e| Error::Other(e.into()))?, + ))); + } + } + Ok(None) +} +*/ + +fn nom_err(input: &str, what: &str, res: nom::Err>) -> Error { + match res { + nom::Err::Error(err) | nom::Err::Failure(err) => Error::Custom(format!( + "failed to parse {} - {}", + what, + nom::error::convert_error(input, err), + )), + err => Error::Custom(format!("failed to parse {} - {}", what, err)), + } +} + +/// This is basically the "2nd tier" parser for our format, the comma separated `key=value` format. +/// +/// Contrary to the `TopLevelParser` this is only used for the value part of a line and never +/// contains multiple lines. +/// +/// At this level, commas *always* separate values, and arrays need to use a different separator +/// (space, semicolon or zero-byte). +struct PropertyStringDeserializer<'de> { + input: &'de str, + schema: ObjSchemaType, + default_key: Option<&'static str>, + current: Option>, +} + +impl<'de> PropertyStringDeserializer<'de> { + /// Returns the next value. The key may be optional due to "default keys". + fn next_property(&mut self) -> Option<(Option<&'de str>, &'de str)> { + if self.input.is_empty() { + return None; + } + + let input = self.input; + let property = match self.input.find(',') { + Some(comma) => { + self.input = &input[(comma + 1)..]; + &input[..comma] + } + None => { + self.input = ""; + input + } + }; + + Some(match property.find('=') { + Some(eq) => (Some(&property[..eq]), &property[(eq + 1)..]), + None => (None, property), + }) + } + + /// Assert that we have a current property entry. + fn current_property(&mut self) -> Result, Error> { + self.current + .take() + .ok_or_else(|| Error::BadState("missing current property in property string")) + } +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut PropertyStringDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_map(self) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_some(self) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct newtype_struct seq tuple + tuple_struct struct enum identifier ignored_any + } +} + +impl<'de, 'a> MapAccess<'de> for &'a mut PropertyStringDeserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + let (key, value) = match self.next_property() { + None => return Ok(None), + Some((None, value)) => match self.default_key { + Some(key) => (key, value), + None => return Err(Error::MissingKey), + }, + Some((Some(key), value)) => (key, value), + }; + + let schema = match self.schema.lookup(key) { + Some((_optional, schema)) => Some(schema), + None => { + if self.schema.additional_properties() { + None + } else { + return Err(Error::UnexpectedKey(key.to_owned())); + } + } + }; + + self.current = Some(CurrentProperty { key, value, schema }); + + seed.deserialize(key.into_deserializer()).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + let current = self.current_property()?; + de_seed(current.value, Some(current.key), current.schema, seed) + } +} + +/// This is the *2nd level* array deserializer handling a single line of elements separated by any +/// of our standard separators: comma, semicolon, space or null-byte. +struct ArrayDeserializer<'de> { + input: &'de str, + schema: &'static ArraySchema, +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut ArrayDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(self) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct tuple + tuple_struct map struct enum identifier ignored_any + } +} + +impl<'de, 'a> SeqAccess<'de> for &'a mut ArrayDeserializer<'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: V) -> Result, Error> + where + V: DeserializeSeed<'de>, + { + if self.input.is_empty() { + return Ok(None); + } + + let input = match self + .input + .find(|c: char| c == ',' || c == ';' || c.is_ascii_whitespace()) + { + Some(pos) => { + let value = &self.input[..pos]; + self.input = self.input[(pos + 1)..].trim_start(); + value + } + None => { + let value = self.input.trim(); + self.input = ""; + value + } + }; + + de_seed(input, None, Some(&self.schema.items), seed).map(Some) + } +} diff --git a/src/tools/config/mod.rs b/src/tools/config/mod.rs new file mode 100644 index 00000000..46a90674 --- /dev/null +++ b/src/tools/config/mod.rs @@ -0,0 +1,89 @@ +//! Our 'key: value' config format. + +pub mod de; +pub mod ser; + +#[doc(inline)] +pub use de::{from_property_string, from_slice, from_str}; + +#[doc(inline)] +pub use ser::{to_bytes, to_property_string, to_writer}; + +// Note: we need an error type since we need to implement serde traits for it. + +/// Config file format or property string parsing error. +#[derive(thiserror::Error, Debug)] +pub enum Error { + // Common: + #[error("invalid schema: Null")] + NullSchema, + + #[error("bad schema: {0}")] + BadSchema(&'static str), + + #[error("config contains illegal characters (non-utf8)")] + NonUtf8, + + // Deserialization: + #[error("invalid trailing characters after configuration")] + TrailingCharacters, + + #[error("unexpected EOF, expected {0}")] + Eof(&'static str), + + #[error("not an array: {0}")] + NotAnArray(String), + + // Because it's stupid for the parser. + #[error("array keys not allowed as additional property")] + AdditionalArray(String), + + #[error("unexpected value, expected {0}")] + Type(&'static str), + + #[error("unexpected key '{0}' and schema does not allow additional properties")] + UnexpectedKey(String), + + #[error("missing key and schema does not define a default key")] + MissingKey, + + #[error("integer literal out of range for '{0}'")] + IntegerOutOfRange(String), + + #[error("ObjectSchema found within a property-string value for '{0}'")] + ObjectInPropertyString(String), + + #[error("parse error: {0}")] + Custom(String), + + #[error("deserialization error: {0}")] + BadState(&'static str), + + #[error(transparent)] + Other(#[from] anyhow::Error), + + // Serialization: + #[error("json serialization failed: {0}")] + Json(#[from] serde_json::Error), + + #[error("cannot serialize non-object types as config file")] + NotAnObject, + + #[error("schema expected type {0} but data type was {1}")] + SchemaError(&'static str, &'static str), + + #[error("value of type type {0} did not fit into a type {1}")] + NumericRange(&'static str, &'static str), + + #[error("failed to write serialized output: {0}")] + Io(#[from] std::io::Error), + + #[error("bad key type trying to serialize a 'map': {0}")] + BadKeyType(String), + + #[error("bad value type serializing to config format: {0}")] + BadValueType(&'static str), + + #[error("type is nested too much and cannot be represented as a config file")] + TooComplex, +} diff --git a/src/tools/config/ser.rs b/src/tools/config/ser.rs new file mode 100644 index 00000000..f684d1a6 --- /dev/null +++ b/src/tools/config/ser.rs @@ -0,0 +1,642 @@ +use std::borrow::Cow; +use std::mem::replace; + +use serde::{ser, Serialize}; +use serde_json::Value; + +use proxmox::api::schema::{ArraySchema, ObjectSchemaType, Schema}; + +use super::Error; + +impl ser::Error for Error { + fn custom(msg: T) -> Self + where + T: std::fmt::Display, + { + Error::Custom(msg.to_string()) + } +} + +pub fn to_bytes(value: &T, schema: &'static Schema) -> Result, Error> { + let mut out = Vec::::new(); + to_writer(value, schema, &mut out)?; + Ok(out) +} + +pub fn to_writer( + value: &T, + schema: &'static Schema, + output: &mut dyn std::io::Write, +) -> Result<(), Error> { + let obj: &'static dyn ObjectSchemaType = match schema { + Schema::Object(obj) => obj as _, + Schema::AllOf(obj) => obj as _, + _ => { + return Err(Error::BadSchema( + "config file format only accepts object schemas at the top level", + )) + } + }; + + value.serialize(&mut TopLevelSerializer::new(output, obj)) +} + +pub fn to_property_string( + value: &T, + schema: &'static Schema, +) -> Result { + if !matches!(schema, Schema::Object(_) | Schema::AllOf(_)) { + return Err(Error::BadSchema( + "cannot serialize non-object as property string", + )); + } + + let mut bytes = Vec::::new(); + value.serialize(&mut LineSerializer::new_toplevel( + &mut bytes, + false, + Some(schema), + ))?; + + String::from_utf8(bytes).map_err(|_| Error::NonUtf8) +} + +/// This is the top level of our config file serializing each 'key' of an object into one line, +/// handling arrays by suffixing them with indices. +struct TopLevelSerializer<'out> { + output: &'out mut dyn std::io::Write, + schema: &'static dyn ObjectSchemaType, + current_key: Option>, +} + +impl<'out> TopLevelSerializer<'out> { + fn new(output: &'out mut dyn std::io::Write, schema: &'static dyn ObjectSchemaType) -> Self { + Self { + output, + schema, + current_key: None, + } + } +} + +macro_rules! not_an_object { + () => {}; + ( $name:ident<$($generic:ident)+>($($args:tt)*) ($ret:ty) $($rest:tt)* ) => { + fn $name<$($generic: ?Sized + Serialize)+>(self, $($args)*) -> Result<$ret, Error> { + Err(Error::NotAnObject) + } + + not_an_object!{ $($rest)* } + }; + ( $name:ident($($args:tt)*) ($ret:ty) $($rest:tt)* ) => { + fn $name(self, $($args)*) -> Result<$ret, Error> { + Err(Error::NotAnObject) + } + + not_an_object!{ $($rest)* } + }; +} + +impl<'a, 'out> ser::Serializer for &'a mut TopLevelSerializer<'out> { + type Ok = (); + type Error = Error; + + type SerializeSeq = ser::Impossible; + type SerializeTuple = ser::Impossible; + type SerializeTupleStruct = ser::Impossible; + type SerializeTupleVariant = ser::Impossible; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = ser::Impossible; + + not_an_object! { + serialize_bool(_: bool)(()) + serialize_char(_: char)(()) + serialize_i8(_: i8)(()) + serialize_i16(_: i16)(()) + serialize_i32(_: i32)(()) + serialize_i64(_: i64)(()) + serialize_u8(_: u8)(()) + serialize_u16(_: u16)(()) + serialize_u32(_: u32)(()) + serialize_u64(_: u64)(()) + serialize_f32(_: f32)(()) + serialize_f64(_: f64)(()) + serialize_str(_: &str)(()) + serialize_bytes(_: &[u8])(()) + serialize_unit_struct(_: &'static str)(()) + serialize_none()(()) + serialize_some(_: &T)(()) + serialize_unit()(()) + serialize_unit_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + )(()) + serialize_newtype_struct(_name: &'static str, _value: &T)(()) + serialize_newtype_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + )(()) + serialize_seq(_len: Option)(ser::Impossible) + serialize_tuple(_len: usize)(ser::Impossible) + serialize_tuple_struct( + _name: &'static str, + _len: usize, + )(ser::Impossible) + serialize_tuple_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + )(ser::Impossible) + serialize_struct_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + )(ser::Impossible) + } + + fn serialize_map(self, _len: Option) -> Result { + Ok(self) + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + Ok(self) + } +} + +impl<'a, 'out> ser::SerializeMap for &'a mut TopLevelSerializer<'out> { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + match serde_json::to_value(key)? { + Value::String(key) => { + self.current_key = Some(Cow::Owned(key)); + Ok(()) + } + other => Err(Error::BadKeyType( + serde_json::to_string(&other).unwrap_or_else(|_| "???".to_string()), + )), + } + } + + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + let key = self + .current_key + .take() + .ok_or_else(|| Error::BadState("serialize_value called without serialize_key"))?; + self.write_value_ln(key, value)?; + Ok(()) + } + + fn end(self) -> Result<(), Error> { + Ok(()) + } +} + +impl<'a, 'out> ser::SerializeStruct for &'a mut TopLevelSerializer<'out> { + type Ok = (); + type Error = Error; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where + T: Serialize, + { + self.write_value_ln(Cow::Borrowed(key), value)?; + Ok(()) + } + + fn end(self) -> Result<(), Error> { + Ok(()) + } +} + +impl<'out> TopLevelSerializer<'out> { + fn write_value_ln(&mut self, key: Cow<'static, str>, value: &T) -> Result<(), Error> + where + T: Serialize, + { + let (optional, schema) = match self.schema.lookup(&key) { + Some((o, s)) => (o, Some(s)), + None => (true, None), + }; + + if let Some(Schema::Array(ArraySchema { items, .. })) = schema { + // When serializing arrays at the top level we need to decide whether to serialize them + // as a single line or multiple. + // + // Since we don't attach this sort of information to the schema (yet), our best bet is + // to just check the contained type. If it is a more complex type (array or object), + // we'll split it up: + match items { + Schema::Array(_) | Schema::Object(_) | Schema::AllOf(_) => { + return value.serialize(&mut LineSerializer::new_multiline_array( + self.output, + optional, + schema, + key, + )) + } + _ => (), // use regular deserialization otherwise + } + } + + self.output.write_all(key.as_bytes())?; + self.output.write_all(b": ")?; + value.serialize(&mut LineSerializer::new_toplevel( + self.output, + optional, + schema, + ))?; + self.output.write_all(b"\n")?; + Ok(()) + } +} + +/// This is the second level serializer. +/// +/// At this point arrays are semicolon separated values, structs/maps are property strings, and +/// anything else is just "printed". +struct LineSerializer<'out> { + output: &'out mut dyn std::io::Write, + optional: bool, + schema: Option<&'static Schema>, + + /// When serializing an array containing property strings, this key is used to produce multiple + /// lines suffixed with an index for each array element. + array_key: Option<(Cow<'static, str>, usize)>, + + /// Used while serializing objects. + current_key: Option, + + // This is to prevent invalid nesting of arrays and property strings: + is_array: bool, + is_object: bool, + + // This is the state for whether we need to already place commas or semicolons during object or + // array serialization + in_array: bool, + in_object: bool, +} + +impl<'out> LineSerializer<'out> { + /// The first line level allows serializing more complex structures such as arrays or maps. + fn new_toplevel( + output: &'out mut dyn std::io::Write, + optional: bool, + schema: Option<&'static Schema>, + ) -> Self { + Self { + output, + array_key: None, + optional, + schema, + current_key: None, + is_array: false, + is_object: false, + in_array: false, + in_object: false, + } + } + + /// Multi-line arrays are also handled by this serializer because the `Serializer` + /// implementation is so tedious... + fn new_multiline_array( + output: &'out mut dyn std::io::Write, + optional: bool, + schema: Option<&'static Schema>, + array_key: Cow<'static, str>, + ) -> Self { + let mut this = Self::new_toplevel(output, optional, schema); + this.array_key = Some((array_key, 0)); + this + } + + fn serialize_object_value( + &mut self, + key: Cow<'static, str>, + value: &T, + ) -> Result<(), Error> { + let next_schema = match self.schema { + Some(Schema::Object(schema)) => schema.lookup(&key), + Some(Schema::AllOf(schema)) => schema.lookup(&key), + Some(_) => { + return Err(Error::BadSchema( + "struct or map with non-object schema type", + )) + } + None => None, + }; + + let (optional, schema) = match next_schema { + Some((optional, schema)) => (optional, Some(schema)), + None => (true, None), + }; + + if replace(&mut self.in_object, true) { + self.output.write_all(b",")?; + } + self.output.write_all(key.as_bytes())?; + self.output.write_all(b"=")?; + + { + let mut next = LineSerializer { + output: self.output, + optional, + schema, + array_key: None, + current_key: None, + is_array: self.is_array, + is_object: self.is_object, + in_array: self.in_array, + in_object: self.in_object, + }; + value.serialize(&mut next)?; + } + Ok(()) + } +} + +macro_rules! forward_simple { + ( $( $name:ident($ty:ty) -> $handler:ident )* ) => { + $( + fn $name(self, v: $ty) -> Result<(), Error> { + self.$handler(v.into()) + } + )* + }; +} + +macro_rules! bad_value_type { + () => {}; + ( $name:ident<$($generic:ident)+>($($args:tt)*) ($ret:ty) ($error:tt) $($rest:tt)* ) => { + fn $name<$($generic: ?Sized + Serialize)+>(self, $($args)*) -> Result<$ret, Error> { + Err(Error::BadValueType($error)) + } + + bad_value_type!{ $($rest)* } + }; + ( $name:ident($($args:tt)*) ($ret:ty) ($error:tt) $($rest:tt)* ) => { + fn $name(self, $($args)*) -> Result<$ret, Error> { + Err(Error::BadValueType($error)) + } + + bad_value_type!{ $($rest)* } + }; +} + +impl<'a, 'out> ser::Serializer for &'a mut LineSerializer<'out> { + type Ok = (); + type Error = Error; + + type SerializeSeq = Self; + type SerializeTuple = ser::Impossible; + type SerializeTupleStruct = ser::Impossible; + type SerializeTupleVariant = ser::Impossible; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = ser::Impossible; + + forward_simple! { + serialize_i8(i8) -> serialize_i64 + serialize_i16(i16) -> serialize_i64 + serialize_i32(i32) -> serialize_i64 + serialize_u8(u8) -> serialize_u64 + serialize_u16(u16) -> serialize_u64 + serialize_u32(u32) -> serialize_u64 + serialize_f32(f32) -> serialize_f64 + } + + bad_value_type! { + serialize_bytes(_: &[u8])(()) ("byte slice") + serialize_unit_struct(_: &'static str)(()) ("unit struct") + serialize_unit()(()) ("unit") + serialize_unit_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + )(()) ("unit variant") + serialize_tuple(_len: usize)(ser::Impossible) ("tuple") + serialize_tuple_struct( + _name: &'static str, + _len: usize, + )(ser::Impossible) ("tuple struct") + serialize_tuple_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + )(ser::Impossible) ("tuple variant") + serialize_struct_variant( + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + )(ser::Impossible) ("struct variant") + } + + fn serialize_i64(self, v: i64) -> Result<(), Error> { + match self.schema { + None | Some(Schema::Integer(_)) | Some(Schema::Number(_)) => { + Ok(write!(self.output, "{}", v)?) + } + Some(_) => Err(Error::BadSchema("integer schema with non-integer value")), + } + } + + fn serialize_u64(self, v: u64) -> Result<(), Error> { + match self.schema { + None | Some(Schema::Integer(_)) | Some(Schema::Number(_)) => { + Ok(write!(self.output, "{}", v)?) + } + Some(_) => Err(Error::BadSchema("integer schema with non-integer value")), + } + } + + fn serialize_f64(self, v: f64) -> Result<(), Error> { + match self.schema { + None | Some(Schema::Number(_)) => Ok(write!(self.output, "{}", v)?), + Some(_) => Err(Error::BadSchema( + "non-number schema with floating poing value", + )), + } + } + + fn serialize_bool(self, v: bool) -> Result<(), Error> { + match self.schema { + None | Some(Schema::Boolean(_)) => { + Ok(self.output.write_all(if v { b"true" } else { b"false" })?) + } + Some(_) => Err(Error::BadSchema("non-boolean schema with boolean value")), + } + } + + fn serialize_char(self, v: char) -> Result<(), Error> { + match self.schema { + None | Some(Schema::String(_)) => Ok(write!(self.output, "{}", v)?), + Some(_) => Err(Error::BadSchema("non-string schema with character value")), + } + } + + fn serialize_str(self, v: &str) -> Result<(), Error> { + match self.schema { + None | Some(Schema::String(_)) => Ok(self.output.write_all(v.as_bytes())?), + Some(_) => Err(Error::BadSchema("non-string schema with string value")), + } + } + + fn serialize_none(self) -> Result<(), Error> { + if self.optional { + return Ok(()); + } + + match self.schema { + None | Some(Schema::Null) => Ok(()), + Some(_) => Err(Error::BadSchema("encountered None at a non optional value")), + } + } + + fn serialize_some(self, value: &T) -> Result<(), Error> { + value.serialize(self) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result<(), Error> { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + value: &T, + ) -> Result<(), Error> { + value.serialize(self) + } + + fn serialize_seq(mut self, _len: Option) -> Result { + if self.is_array { + return Err(Error::TooComplex); + } + + self.is_array = true; + Ok(self) + } + + fn serialize_map(mut self, _len: Option) -> Result { + if self.is_object { + return Err(Error::TooComplex); + } + + self.is_object = true; + Ok(self) + } + + fn serialize_struct(mut self, _name: &'static str, _len: usize) -> Result { + if self.is_object { + return Err(Error::TooComplex); + } + + self.is_object = true; + Ok(self) + } +} + +impl<'a, 'out> ser::SerializeMap for &'a mut LineSerializer<'out> { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + match serde_json::to_value(key)? { + Value::String(s) => { + self.current_key = Some(s); + Ok(()) + } + other => Err(Error::BadKeyType( + serde_json::to_string(&other).unwrap_or_else(|_| "???".to_string()), + )), + } + } + + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + let key = self + .current_key + .take() + .ok_or_else(|| Error::BadState("serialize_value called without serialize_key"))?; + self.serialize_object_value(Cow::Owned(key), value) + } + + fn end(self) -> Result<(), Error> { + Ok(()) + } +} + +impl<'a, 'out> ser::SerializeStruct for &'a mut LineSerializer<'out> { + type Ok = (); + type Error = Error; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where + T: Serialize, + { + self.serialize_object_value(Cow::Borrowed(key), value) + } + + fn end(self) -> Result<(), Error> { + Ok(()) + } +} + +impl<'a, 'out> ser::SerializeSeq for &'a mut LineSerializer<'out> { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + if let Some((ref key, ref mut index)) = self.array_key { + if replace(&mut self.in_array, true) { + write!(self.output, "\n{}{}: ", key, index)?; + *index += 1; + } + } else if replace(&mut self.in_array, true) { + self.output.write_all(b";")?; + } + + value.serialize(&mut **self) + } + + fn end(self) -> Result<(), Error> { + Ok(()) + } +} -- 2.20.1