From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pbs-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 0C93C1FF165 for <inbox@lore.proxmox.com>; Thu, 8 May 2025 10:24:32 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CCC05A425; Thu, 8 May 2025 10:24:48 +0200 (CEST) Date: Thu, 8 May 2025 10:24:12 +0200 From: Wolfgang Bumiller <w.bumiller@proxmox.com> To: Gabriel Goller <g.goller@proxmox.com> Message-ID: <t26nlnfp2kclqkotha76a5vz23kjkl6as4aawkj2fw2rkkesox@mpbsb24dhxst> References: <20250507163114.1162300-1-g.goller@proxmox.com> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20250507163114.1162300-1-g.goller@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.079 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [RFC PATCH] schema: allow serializing rust Schema to perl JsonSchema X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion <pbs-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/> List-Post: <mailto:pbs-devel@lists.proxmox.com> List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com> Cc: pbs-devel@lists.proxmox.com Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com> On Wed, May 07, 2025 at 06:31:14PM +0200, Gabriel Goller wrote: > Implement serde::Serialize on the rust Schema, so that we can serialize > it and use it as a JsonSchema in perl. This allows us to write a single > Schema in rust and reuse it in perl for the api properties. > > The interesting bits (custom impls) are: > * Recursive oneOf type-property resolver > * oneOf and allOf implementation > * ApiStringFormat skip of ApiStringVerifyFn (which won't work obviously) I'd like the commit to explain where we actually want to use/need this. Long ago (before we had the `OneOf` schema I already started this, but figured we didn't really need it anyway at the time. > > Signed-off-by: Gabriel Goller <g.goller@proxmox.com> > --- > > This is kinda hard to test, because nothing actually fails when the properties > are wrong and the whole allOf, oneOf and ApiStringFormat is a bit > untransparent. So some properties could be wrongly serialized, but I > think I got everything right. Looking over all the properties would be > appreciated! allOf and regular object-schema can use the same startegy, you serialize a map and you can use the iterator you get from the `.properties()` method to iterate over all properties. One thing you can take out of my `staff/old-perl-serializing` commit is the `ApiStringFormat::VerifyFn` handling, it uses perlmod to serialize the function as an xsub with some magic to call the rust verifier code. We *may* also consider using such a wrapper for regex patterns in case we run into any significant differences between the perl & regex-crate engines... Also, technically the perl `JSONSchema` should be able to verify a schema... (At least partially...) > > Cargo.toml | 1 + > proxmox-schema/Cargo.toml | 4 +- > proxmox-schema/src/const_regex.rs | 12 ++ > proxmox-schema/src/schema.rs | 242 ++++++++++++++++++++++++++++-- > 4 files changed, 246 insertions(+), 13 deletions(-) > > diff --git a/Cargo.toml b/Cargo.toml > index 2ca0ea618707..a0d760ae8fc9 100644 > --- a/Cargo.toml > +++ b/Cargo.toml > @@ -104,6 +104,7 @@ regex = "1.5" > serde = "1.0" > serde_cbor = "0.11.1" > serde_json = "1.0" > +serde_with = "3.8.1" ^ Is it really worth introducing this for `None` handling :S > serde_plain = "1.0" > syn = { version = "2", features = [ "full", "visit-mut" ] } > tar = "0.4" > diff --git a/proxmox-schema/Cargo.toml b/proxmox-schema/Cargo.toml > index c8028aa52bd0..48ebf3a9005e 100644 > --- a/proxmox-schema/Cargo.toml > +++ b/proxmox-schema/Cargo.toml > @@ -15,8 +15,9 @@ rust-version.workspace = true > anyhow.workspace = true > const_format = { workspace = true, optional = true } > regex.workspace = true > -serde.workspace = true > +serde = { workspace = true, features = ["derive"] } > serde_json.workspace = true > +serde_with.workspace = true > textwrap = "0.16" > > # the upid type needs this for 'getpid' > @@ -27,7 +28,6 @@ proxmox-api-macro = { workspace = true, optional = true } > > [dev-dependencies] > url.workspace = true > -serde = { workspace = true, features = [ "derive" ] } > proxmox-api-macro.workspace = true > > [features] > diff --git a/proxmox-schema/src/const_regex.rs b/proxmox-schema/src/const_regex.rs > index 8ddc41abedeb..56f6c27fa1de 100644 > --- a/proxmox-schema/src/const_regex.rs > +++ b/proxmox-schema/src/const_regex.rs > @@ -1,5 +1,7 @@ > use std::fmt; > > +use serde::Serialize; > + > /// Helper to represent const regular expressions > /// > /// The current Regex::new() function is not `const_fn`. Unless that > @@ -13,6 +15,16 @@ pub struct ConstRegexPattern { > pub regex_obj: fn() -> &'static regex::Regex, > } > > +impl Serialize for ConstRegexPattern { I'm not a huge fan of adding this trait to `ConstRegexPattern`... > + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> > + where > + S: serde::Serializer, > + { > + // Get the compiled regex and serialize its pattern as a string > + serializer.serialize_str((self.regex_obj)().as_str()) > + } > +} > + > impl fmt::Debug for ConstRegexPattern { > fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { > write!(f, "{:?}", self.regex_string) > diff --git a/proxmox-schema/src/schema.rs b/proxmox-schema/src/schema.rs > index ddbbacd462a4..11461eaf6ace 100644 > --- a/proxmox-schema/src/schema.rs > +++ b/proxmox-schema/src/schema.rs > @@ -4,10 +4,12 @@ > //! completely static API definitions that can be included within the programs read-only text > //! segment. > > -use std::collections::HashSet; > +use std::collections::{HashMap, HashSet}; > use std::fmt; > > use anyhow::{bail, format_err, Error}; > +use serde::ser::{SerializeMap, SerializeStruct}; > +use serde::{Serialize, Serializer}; > use serde_json::{json, Value}; > > use crate::ConstRegexPattern; > @@ -181,7 +183,8 @@ impl<'a> FromIterator<(&'a str, Error)> for ParameterError { > } > > /// Data type to describe boolean values > -#[derive(Debug)] > +#[serde_with::skip_serializing_none] > +#[derive(Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > #[non_exhaustive] > pub struct BooleanSchema { > @@ -222,7 +225,8 @@ impl BooleanSchema { > } > > /// Data type to describe integer values. > -#[derive(Debug)] > +#[serde_with::skip_serializing_none] > +#[derive(Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > #[non_exhaustive] > pub struct IntegerSchema { > @@ -304,7 +308,8 @@ impl IntegerSchema { > } > > /// Data type to describe (JSON like) number value > -#[derive(Debug)] > +#[serde_with::skip_serializing_none] > +#[derive(Debug, Serialize)] > #[non_exhaustive] > pub struct NumberSchema { > pub description: &'static str, > @@ -406,7 +411,8 @@ impl PartialEq for NumberSchema { > } > > /// Data type to describe string values. > -#[derive(Debug)] > +#[serde_with::skip_serializing_none] > +#[derive(Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > #[non_exhaustive] > pub struct StringSchema { ^ needs a `#[serde(rename_all = "camelCase")]` > @@ -418,6 +424,7 @@ pub struct StringSchema { > /// Optional maximal length. > pub max_length: Option<usize>, > /// Optional microformat. > + #[serde(flatten)] > pub format: Option<&'static ApiStringFormat>, > /// A text representation of the format/type (used to generate documentation). > pub type_text: Option<&'static str>, ^ while this needs a `rename = "typetext"` But also, if the type text is None, property strings get the type text generated via `format::get_property_string_type_text`. You can take this from my staff branch. > @@ -534,7 +541,8 @@ impl StringSchema { > /// > /// All array elements are of the same type, as defined in the `items` > /// schema. > -#[derive(Debug)] > +#[serde_with::skip_serializing_none] > +#[derive(Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > #[non_exhaustive] > pub struct ArraySchema { ^ needs a `#[serde(rename_all = "camelCase")]` > @@ -634,6 +642,43 @@ pub type SchemaPropertyEntry = (&'static str, bool, &'static Schema); > /// This is a workaround unless RUST can const_fn `Hash::new()` > pub type SchemaPropertyMap = &'static [SchemaPropertyEntry]; > > +/// A wrapper struct to hold the [`SchemaPropertyMap`] and serialize it nicely. > +/// > +/// [`SchemaPropertyMap`] holds [`SchemaPropertyEntry`]s which are tuples. Tuples are serialized to > +/// arrays, but we need a Map with the name (first item in the tuple) as a key and the optional > +/// (second item in the tuple) as a property of the value. > +pub struct SerializableSchemaProperties<'a>(&'a [SchemaPropertyEntry]); > + > +impl Serialize for SerializableSchemaProperties<'_> { > + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> > + where > + S: Serializer, > + { > + let mut seq = serializer.serialize_map(Some(self.0.len()))?; > + > + for (name, optional, schema) in self.0 { > + let schema_with_metadata = OptionalSchema { > + optional: *optional, > + schema, > + }; > + > + seq.serialize_entry(&name, &schema_with_metadata)?; > + } > + > + seq.end() > + } > +} > + > +/// A schema with a optional bool property. > +/// > +/// The schema gets flattened, so it looks just like a normal Schema but with a optional property. > +#[derive(Serialize)] > +struct OptionalSchema<'a> { > + optional: bool, > + #[serde(flatten)] > + schema: &'a Schema, > +} > + > const fn assert_properties_sorted(properties: SchemaPropertyMap) { > use std::cmp::Ordering; > > @@ -656,7 +701,7 @@ const fn assert_properties_sorted(properties: SchemaPropertyMap) { > /// Legacy property strings may contain shortcuts where the *value* of a specific key is used as a > /// *key* for yet another option. Most notably, PVE's `netX` properties use `<model>=<macaddr>` > /// instead of `model=<model>,macaddr=<macaddr>`. > -#[derive(Clone, Copy, Debug)] > +#[derive(Clone, Copy, Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > pub struct KeyAliasInfo { > pub key_alias: &'static str, > @@ -700,6 +745,77 @@ pub struct ObjectSchema { > pub key_alias_info: Option<KeyAliasInfo>, > } > > +impl Serialize for ObjectSchema { > + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> > + where > + S: Serializer, Move the contents of this into a: fn serialize_object_schema<S, T: ObjectSchemaType>(schema: &T, serializer: S) -> Result<S::Ok, S::Error> > + { > + let mut s = serializer.serialize_struct("ObjectSchema", 5)?; (^ May as well just use `_map()` instead of `_struct()`, not sure there's much of a benefit here.) > + > + s.serialize_field("description", self.description)?; > + s.serialize_field("additional_properties", &self.additional_properties)?; > + > + // Collect all OneOf type properties recursively The `ObjectSchemaType` trait gives you a `.properties()` Iterator. > + let mut oneofs: Vec<SchemaPropertyEntry> = Vec::new(); > + for (_, _, schema) in self.properties { > + collect_oneof_type_properties(schema, &mut oneofs); > + } > + > + if !oneofs.is_empty() { > + // Extend the oneOf type-properties with the actual properties > + oneofs.extend_from_slice(self.properties); > + s.serialize_field("properties", &SerializableSchemaProperties(&oneofs))?; > + } else { > + s.serialize_field("properties", &SerializableSchemaProperties(self.properties))?; > + } > + > + if let Some(default_key) = self.default_key { > + s.serialize_field("default_key", default_key)?; ^ Like `optional`, in perl `default_key` is part of the *property*, not part of the `Object` schema. > + } else { > + s.skip_field("default_key")?; > + } > + if let Some(key_alias_info) = self.key_alias_info { This does not exist in perl. The key alias info works like this: - All keys listed in the `KeyAliasInfo::values` alias to `KeyAliasInfo::alias`, and *which* key has been used is stored in the property named `KeyAliasInfo::key_alias`. Assume we have key_alias_info = { keyAlias = "model", alias = "macaddr", values = [ "virtio", "e1000", ... ] } This transforms: virtio=aa:aa:aa:aa:aa:aa,bridge=vmbr0 into model=virtio,macaddr=aa:aa:aa:aa:aa:aa,bridge=vmbr0 So you need to pass the key alias info (KAI from here on out) along to a helper which serializes the property list which then: - first finds the schema for the `KAI.alias` - whenever a property is serialized which is listed in the `KAI.values` array it merges: `keyAlias = KAI.keyAlias, alias = KAI.alias` *into* the property - iterate through all the `KAI.values` and serialize them with the schema found for the `KAI.alias` property > + s.serialize_field("key_alias_info", &key_alias_info)?; > + } else { > + s.skip_field("key_alias_info")?; > + } > + > + s.end() > + } > +} > + > +// Recursive function to find all OneOf type properties in a schema > +fn collect_oneof_type_properties(schema: &Schema, result: &mut Vec<SchemaPropertyEntry>) { Why are you recursing into arrays, property strings etc. here? The contents of arrays aren't part of the containing object's property list? > + match schema { > + Schema::OneOf(oneof) => { > + result.push(*oneof.type_property_entry); > + } > + Schema::Array(array) => { > + // Recursively check the array schema > + collect_oneof_type_properties(array.items, result); > + } > + Schema::String(string) => { > + // Check the PropertyString Schema > + if let Some(ApiStringFormat::PropertyString(schema)) = string.format { > + collect_oneof_type_properties(schema, result); > + } > + } > + Schema::Object(obj) => { > + // Check all properties in the object > + for (_, _, prop_schema) in obj.properties { > + collect_oneof_type_properties(prop_schema, result); > + } > + } > + Schema::AllOf(all_of) => { > + // Check all schemas in the allOf list > + for &schema in all_of.list { > + collect_oneof_type_properties(schema, result); > + } > + } > + _ => {} > + } > +} > + > impl ObjectSchema { > /// Create a new `object` schema. > /// > @@ -811,7 +927,7 @@ impl ObjectSchema { > /// > /// Technically this could also contain an `additional_properties` flag, however, in the JSON > /// Schema, this is not supported, so here we simply assume additional properties to be allowed. > -#[derive(Debug)] > +#[derive(Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > #[non_exhaustive] > pub struct AllOfSchema { > @@ -864,7 +980,7 @@ impl AllOfSchema { > /// In serde-language, we use an internally tagged enum representation. > /// > /// Note that these are limited to object schemas. Other schemas will produce errors. > -#[derive(Debug)] > +#[derive(Debug, Serialize)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > #[non_exhaustive] > pub struct OneOfSchema { > @@ -880,6 +996,30 @@ pub struct OneOfSchema { > pub list: &'static [(&'static str, &'static Schema)], > } > > +fn serialize_oneof_schema<S>(one_of: &OneOfSchema, serializer: S) -> Result<S::Ok, S::Error> > +where > + S: Serializer, > +{ I'll first have to refresh my memory on the perl-side oneOf weirdness for this. Maybe @Dominik can take a look as well. If we need rust->perl support here then the perl oneOf may very well need some changes... > + use serde::ser::SerializeMap; > + > + let mut map = serializer.serialize_map(Some(3))?; > + > + map.serialize_entry("description", &one_of.description)?; > + > + let variants = one_of > + .list > + .iter() > + .map(|(_, schema)| schema) > + .collect::<Vec<_>>(); > + > + map.serialize_entry("oneOf", &variants)?; > + > + // The schema gets inserted into the parent properties > + map.serialize_entry("type-property", &one_of.type_property_entry.0)?; > + > + map.end() > +} > + > const fn assert_one_of_list_is_sorted(list: &[(&str, &Schema)]) { > use std::cmp::Ordering; > > @@ -1360,7 +1500,8 @@ impl Iterator for OneOfPropertyIterator { > /// ], > /// ).schema(); > /// ``` > -#[derive(Debug)] > +#[derive(Debug, Serialize)] > +#[serde(tag = "type", rename_all = "lowercase")] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > pub enum Schema { > Null, > @@ -1370,10 +1511,81 @@ pub enum Schema { > String(StringSchema), > Object(ObjectSchema), > Array(ArraySchema), > + #[serde(serialize_with = "serialize_allof_schema")] ^ This should use `#[serde(rename = "object")]`. We don't have explicit AllOfs in perl, because in perl we can easily just merge properties. In rust we were unable to do that, since our schemas are all compile-time-const. > AllOf(AllOfSchema), > + #[serde(untagged)] > + #[serde(serialize_with = "serialize_oneof_schema", rename = "oneOf")] > OneOf(OneOfSchema), > } > > +/// Serialize the AllOf Schema > +/// > +/// This will create one ObjectSchema and merge the properties of all the children. > +fn serialize_allof_schema<S>(all_of: &AllOfSchema, serializer: S) -> Result<S::Ok, S::Error> drop this and call the above mentioned `serialize_object_schema()` instead. > +where > + S: Serializer, > +{ > + use serde::ser::SerializeMap; > + > + let mut map = serializer.serialize_map(Some(4))?; > + > + // Add the top-level description > + map.serialize_entry("description", &all_of.description)?; > + > + // The type is always object > + map.serialize_entry("type", "object")?; > + > + let mut all_properties = HashMap::new(); > + let mut additional_properties = false; > + > + for &schema in all_of.list { > + if let Some(object_schema) = schema.object() { > + // If any schema allows additional properties, the merged schema will too > + if object_schema.additional_properties { > + additional_properties = true; > + } > + > + // Add all properties from this schema > + for (name, optional, prop_schema) in object_schema.properties { > + all_properties.insert(*name, (*optional, *prop_schema)); > + } > + } else if let Some(nested_all_of) = schema.all_of() { > + // For nested AllOf schemas go through recursively > + for &nested_schema in nested_all_of.list { > + if let Some(object_schema) = nested_schema.object() { > + if object_schema.additional_properties { > + additional_properties = true; > + } > + > + for (name, optional, prop_schema) in object_schema.properties { > + all_properties.insert(*name, (*optional, *prop_schema)); > + } > + } > + } > + } > + } > + > + // Add the merged properties > + let properties_entry = all_properties > + .iter() > + .map(|(name, (optional, schema))| { > + ( > + *name, > + OptionalSchema { > + optional: *optional, > + schema, > + }, > + ) > + }) > + .collect::<HashMap<_, _>>(); > + > + map.serialize_entry("properties", &properties_entry)?; > + > + map.serialize_entry("additional_properties", &additional_properties)?; > + > + map.end() > +} > + > impl Schema { > /// Verify JSON value with `schema`. > pub fn verify_json(&self, data: &Value) -> Result<(), Error> { > @@ -1694,10 +1906,12 @@ impl Schema { > } > > /// A string enum entry. An enum entry must have a value and a description. > -#[derive(Clone, Debug)] > +#[derive(Clone, Debug, Serialize)] > +#[serde(transparent)] > #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))] > pub struct EnumEntry { > pub value: &'static str, > + #[serde(skip)] > pub description: &'static str, > } > > @@ -1776,14 +1990,20 @@ impl EnumEntry { > /// let data = PRODUCT_LIST_SCHEMA.parse_property_string("1,2"); // parse as Array > /// assert!(data.is_ok()); > /// ``` > +#[derive(Serialize)] > pub enum ApiStringFormat { > /// Enumerate all valid strings > + #[serde(rename = "enum")] > Enum(&'static [EnumEntry]), > /// Use a regular expression to describe valid strings. > + #[serde(rename = "pattern")] > Pattern(&'static ConstRegexPattern), > /// Use a schema to describe complex types encoded as string. > + #[serde(rename = "format")] > PropertyString(&'static Schema), > /// Use a verification function. > + /// Note: we can't serialize this, panic if we encounter this. > + #[serde(skip)] ^ as mentioned, see my old-perl-serializing branch... > VerifyFn(ApiStringVerifyFn), > } > > -- > 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel