From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 9ECB37253F for ; Thu, 1 Jul 2021 15:46:40 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 9B0EC26B66 for ; Thu, 1 Jul 2021 15:46:40 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 8A58826B5B for ; Thu, 1 Jul 2021 15:46:39 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 60FF1400BA for ; Thu, 1 Jul 2021 15:46:39 +0200 (CEST) From: Fabian Ebner To: pve-devel@lists.proxmox.com Date: Thu, 1 Jul 2021 15:46:34 +0200 Message-Id: <20210701134634.130628-3-f.ebner@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210701134634.130628-1-f.ebner@proxmox.com> References: <20210701134634.130628-1-f.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.599 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH proxmox-apt 2/2] support quote-word parsing for one-line format X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Jul 2021 13:46:40 -0000 so that parsing CD ROM repositories with spaces in the name works too. But it's not limited to that, and should make one-line parsing rather similar to what APT does (stanza parsing in APT doesn't use ParseQuoteWord at all AFAICS). Signed-off-by: Fabian Ebner --- src/repositories/file/list_parser.rs | 143 +++++++++++++++++----- src/repositories/repository.rs | 58 ++++++++- tests/sources.list.d.expected/cdroms.list | 10 ++ tests/sources.list.d.expected/files.list | 4 + tests/sources.list.d/cdroms.list | 7 ++ tests/sources.list.d/files.list | 2 + 6 files changed, 186 insertions(+), 38 deletions(-) create mode 100644 tests/sources.list.d.expected/cdroms.list create mode 100644 tests/sources.list.d.expected/files.list create mode 100644 tests/sources.list.d/cdroms.list create mode 100644 tests/sources.list.d/files.list diff --git a/src/repositories/file/list_parser.rs b/src/repositories/file/list_parser.rs index 86c9955..04c1729 100644 --- a/src/repositories/file/list_parser.rs +++ b/src/repositories/file/list_parser.rs @@ -1,7 +1,6 @@ use std::convert::TryInto; use std::io::BufRead; -use std::iter::{Iterator, Peekable}; -use std::str::SplitAsciiWhitespace; +use std::iter::Iterator; use anyhow::{bail, format_err, Error}; @@ -9,6 +8,78 @@ use crate::repositories::{APTRepository, APTRepositoryFileType, APTRepositoryOpt use super::APTRepositoryParser; +// TODO convert %-escape characters. Also adapt printing back accordingly, +// because at least '%' needs to be re-escaped when printing. +/// See APT's ParseQuoteWord in contrib/strutl.cc +/// +/// Doesn't split on whitespace when between `[]` or `""` and strips `"` from the word. +/// +/// Currently, %-escaped characters are not interpreted, but passed along as is. +struct SplitQuoteWord { + rest: String, + position: usize, +} + +impl SplitQuoteWord { + pub fn new(string: String) -> Self { + Self { + rest: string, + position: 0, + } + } +} + +impl Iterator for SplitQuoteWord { + type Item = Result; + + fn next(&mut self) -> Option { + let rest = &self.rest[self.position..]; + + let mut start = None; + let mut wait_for = None; + + for (n, c) in rest.chars().enumerate() { + self.position += 1; + + if let Some(wait_for_char) = wait_for { + if wait_for_char == c { + wait_for = None; + } + continue; + } + + if char::is_ascii_whitespace(&c) { + if let Some(start) = start { + return Some(Ok(rest[start..n].replace('"', ""))); + } + continue; + } + + if start == None { + start = Some(n); + } + + if c == '"' { + wait_for = Some('"'); + } + + if c == '[' { + wait_for = Some(']'); + } + } + + if let Some(wait_for) = wait_for { + return Some(Err(format_err!("missing terminating '{}'", wait_for))); + } + + if let Some(start) = start { + return Some(Ok(rest[start..].replace('"', ""))); + } + + None + } +} + pub struct APTListFileParser { input: R, line_nr: usize, @@ -31,24 +102,18 @@ impl APTListFileParser { /// Errors when options are invalid or not closed by `']'`. fn parse_options( options: &mut Vec, - tokens: &mut Peekable, + tokens: &mut SplitQuoteWord, ) -> Result<(), Error> { - let mut option = match tokens.peek() { - Some(token) => { - match token.strip_prefix('[') { - Some(option) => option, - None => return Ok(()), // doesn't look like options - } - } - None => return Ok(()), - }; - - tokens.next(); // avoid reading the beginning twice - let mut finished = false; + loop { + let mut option = match tokens.next() { + Some(token) => token?, + None => bail!("options not closed by ']'"), + }; + if let Some(stripped) = option.strip_suffix(']') { - option = stripped; + option = stripped.to_string(); if option.is_empty() { break; } @@ -83,11 +148,6 @@ impl APTListFileParser { if finished { break; } - - option = match tokens.next() { - Some(option) => option, - None => bail!("options not closed by ']'"), - } } Ok(()) @@ -122,24 +182,43 @@ impl APTListFileParser { line = line_start; } - let mut tokens = line.split_ascii_whitespace().peekable(); - - match tokens.next() { - Some(package_type) => { + // e.g. quoted "deb" is not accepted by APT, so no need for quote word parsing here + line = match line.split_once(|c| char::is_ascii_whitespace(&c)) { + Some((package_type, rest)) => { repo.types.push(package_type.try_into()?); + rest } None => return Ok(None), // empty line - } + }; - Self::parse_options(&mut repo.options, &mut tokens)?; + line = line.trim_start_matches(|c| char::is_ascii_whitespace(&c)); + + let has_options = match line.strip_prefix('[') { + Some(rest) => { + // avoid the start of the options to be interpreted as the start of a quote word + line = rest; + true + } + None => false, + }; + + let mut tokens = SplitQuoteWord::new(line.to_string()); + + if has_options { + Self::parse_options(&mut repo.options, &mut tokens)?; + } // the rest of the line is just ' [...]' - let mut tokens = tokens.map(str::to_string); repo.uris - .push(tokens.next().ok_or_else(|| format_err!("missing URI"))?); - repo.suites - .push(tokens.next().ok_or_else(|| format_err!("missing suite"))?); - repo.components.extend(tokens); + .push(tokens.next().ok_or_else(|| format_err!("missing URI"))??); + repo.suites.push( + tokens + .next() + .ok_or_else(|| format_err!("missing suite"))??, + ); + for token in tokens { + repo.components.push(token?); + } repo.comment = std::mem::take(&mut self.comment); diff --git a/src/repositories/repository.rs b/src/repositories/repository.rs index cf17380..4e1ea6e 100644 --- a/src/repositories/repository.rs +++ b/src/repositories/repository.rs @@ -433,6 +433,41 @@ fn suite_variant(suite: &str) -> (&str, &str) { (suite, "") } +/// Strips existing double quotes from the string first, and then adds double quotes at +/// the beginning and end if there is an ASCII whitespace in the `string`, which is not +/// escaped by `[]`. +fn quote_for_one_line(string: &str) -> String { + let mut add_quotes = false; + let mut wait_for_bracket = false; + + // easier to just quote the whole string, so ignore pre-existing quotes + // currently, parsing removes them anyways, but being on the safe side is rather cheap + let string = string.replace('"', ""); + + for c in string.chars() { + if wait_for_bracket { + if c == ']' { + wait_for_bracket = false; + } + continue; + } + + if char::is_ascii_whitespace(&c) { + add_quotes = true; + break; + } + + if c == '[' { + wait_for_bracket = true; + } + } + + match add_quotes { + true => format!("\"{}\"", string), + false => string, + } +} + /// Writes a repository in one-line format followed by a blank line. /// /// Expects that `repo.file_type == APTRepositoryFileType::List`. @@ -457,15 +492,26 @@ fn write_one_line(repo: &APTRepository, w: &mut dyn Write) -> Result<(), Error> if !repo.options.is_empty() { write!(w, "[ ")?; - repo.options - .iter() - .try_for_each(|option| write!(w, "{}={} ", option.key, option.values.join(",")))?; + + for option in repo.options.iter() { + let option = quote_for_one_line(&format!("{}={}", option.key, option.values.join(","))); + write!(w, "{} ", option)?; + } + write!(w, "] ")?; }; - write!(w, "{} ", repo.uris[0])?; - write!(w, "{} ", repo.suites[0])?; - writeln!(w, "{}", repo.components.join(" "))?; + write!(w, "{} ", quote_for_one_line(&repo.uris[0]))?; + write!(w, "{} ", quote_for_one_line(&repo.suites[0]))?; + writeln!( + w, + "{}", + repo.components + .iter() + .map(|comp| quote_for_one_line(comp)) + .collect::>() + .join(" ") + )?; writeln!(w)?; diff --git a/tests/sources.list.d.expected/cdroms.list b/tests/sources.list.d.expected/cdroms.list new file mode 100644 index 0000000..7d75573 --- /dev/null +++ b/tests/sources.list.d.expected/cdroms.list @@ -0,0 +1,10 @@ +# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/ stretch pve + +# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/proxmox/packages/ / + +deb [ trusted=yes ] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve + +deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ / + +deb [ trusted=yes ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main + diff --git a/tests/sources.list.d.expected/files.list b/tests/sources.list.d.expected/files.list new file mode 100644 index 0000000..5e77023 --- /dev/null +++ b/tests/sources.list.d.expected/files.list @@ -0,0 +1,4 @@ +deb [ trusted=yes ] "file:///some/spacey/mount point/" bullseye pve + +deb [ lang=it ] "file:///some/spacey/mount point/proxmox/packages/" / + diff --git a/tests/sources.list.d/cdroms.list b/tests/sources.list.d/cdroms.list new file mode 100644 index 0000000..3b12626 --- /dev/null +++ b/tests/sources.list.d/cdroms.list @@ -0,0 +1,7 @@ +#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/ stretch pve +#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/proxmox/packages/ / + +deb [trusted=yes] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve +deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ / + +deb [ "trusted=yes" ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main diff --git a/tests/sources.list.d/files.list b/tests/sources.list.d/files.list new file mode 100644 index 0000000..4a5e4c2 --- /dev/null +++ b/tests/sources.list.d/files.list @@ -0,0 +1,2 @@ +deb [trusted=yes] "file:///some/spacey/mount point/" bullseye pve +deb [lang="it"] file:///some/spacey/"mount point"/proxmox/packages/ / -- 2.30.2