From: Fabian Ebner <f.ebner@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH proxmox-apt 2/2] support quote-word parsing for one-line format
Date: Thu, 1 Jul 2021 15:46:34 +0200 [thread overview]
Message-ID: <20210701134634.130628-3-f.ebner@proxmox.com> (raw)
In-Reply-To: <20210701134634.130628-1-f.ebner@proxmox.com>
so that parsing CD ROM repositories with spaces in the name works too.
But it's not limited to that, and should make one-line parsing rather
similar to what APT does (stanza parsing in APT doesn't use
ParseQuoteWord at all AFAICS).
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
---
src/repositories/file/list_parser.rs | 143 +++++++++++++++++-----
src/repositories/repository.rs | 58 ++++++++-
tests/sources.list.d.expected/cdroms.list | 10 ++
tests/sources.list.d.expected/files.list | 4 +
tests/sources.list.d/cdroms.list | 7 ++
tests/sources.list.d/files.list | 2 +
6 files changed, 186 insertions(+), 38 deletions(-)
create mode 100644 tests/sources.list.d.expected/cdroms.list
create mode 100644 tests/sources.list.d.expected/files.list
create mode 100644 tests/sources.list.d/cdroms.list
create mode 100644 tests/sources.list.d/files.list
diff --git a/src/repositories/file/list_parser.rs b/src/repositories/file/list_parser.rs
index 86c9955..04c1729 100644
--- a/src/repositories/file/list_parser.rs
+++ b/src/repositories/file/list_parser.rs
@@ -1,7 +1,6 @@
use std::convert::TryInto;
use std::io::BufRead;
-use std::iter::{Iterator, Peekable};
-use std::str::SplitAsciiWhitespace;
+use std::iter::Iterator;
use anyhow::{bail, format_err, Error};
@@ -9,6 +8,78 @@ use crate::repositories::{APTRepository, APTRepositoryFileType, APTRepositoryOpt
use super::APTRepositoryParser;
+// TODO convert %-escape characters. Also adapt printing back accordingly,
+// because at least '%' needs to be re-escaped when printing.
+/// See APT's ParseQuoteWord in contrib/strutl.cc
+///
+/// Doesn't split on whitespace when between `[]` or `""` and strips `"` from the word.
+///
+/// Currently, %-escaped characters are not interpreted, but passed along as is.
+struct SplitQuoteWord {
+ rest: String,
+ position: usize,
+}
+
+impl SplitQuoteWord {
+ pub fn new(string: String) -> Self {
+ Self {
+ rest: string,
+ position: 0,
+ }
+ }
+}
+
+impl Iterator for SplitQuoteWord {
+ type Item = Result<String, Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let rest = &self.rest[self.position..];
+
+ let mut start = None;
+ let mut wait_for = None;
+
+ for (n, c) in rest.chars().enumerate() {
+ self.position += 1;
+
+ if let Some(wait_for_char) = wait_for {
+ if wait_for_char == c {
+ wait_for = None;
+ }
+ continue;
+ }
+
+ if char::is_ascii_whitespace(&c) {
+ if let Some(start) = start {
+ return Some(Ok(rest[start..n].replace('"', "")));
+ }
+ continue;
+ }
+
+ if start == None {
+ start = Some(n);
+ }
+
+ if c == '"' {
+ wait_for = Some('"');
+ }
+
+ if c == '[' {
+ wait_for = Some(']');
+ }
+ }
+
+ if let Some(wait_for) = wait_for {
+ return Some(Err(format_err!("missing terminating '{}'", wait_for)));
+ }
+
+ if let Some(start) = start {
+ return Some(Ok(rest[start..].replace('"', "")));
+ }
+
+ None
+ }
+}
+
pub struct APTListFileParser<R: BufRead> {
input: R,
line_nr: usize,
@@ -31,24 +102,18 @@ impl<R: BufRead> APTListFileParser<R> {
/// Errors when options are invalid or not closed by `']'`.
fn parse_options(
options: &mut Vec<APTRepositoryOption>,
- tokens: &mut Peekable<SplitAsciiWhitespace>,
+ tokens: &mut SplitQuoteWord,
) -> Result<(), Error> {
- let mut option = match tokens.peek() {
- Some(token) => {
- match token.strip_prefix('[') {
- Some(option) => option,
- None => return Ok(()), // doesn't look like options
- }
- }
- None => return Ok(()),
- };
-
- tokens.next(); // avoid reading the beginning twice
-
let mut finished = false;
+
loop {
+ let mut option = match tokens.next() {
+ Some(token) => token?,
+ None => bail!("options not closed by ']'"),
+ };
+
if let Some(stripped) = option.strip_suffix(']') {
- option = stripped;
+ option = stripped.to_string();
if option.is_empty() {
break;
}
@@ -83,11 +148,6 @@ impl<R: BufRead> APTListFileParser<R> {
if finished {
break;
}
-
- option = match tokens.next() {
- Some(option) => option,
- None => bail!("options not closed by ']'"),
- }
}
Ok(())
@@ -122,24 +182,43 @@ impl<R: BufRead> APTListFileParser<R> {
line = line_start;
}
- let mut tokens = line.split_ascii_whitespace().peekable();
-
- match tokens.next() {
- Some(package_type) => {
+ // e.g. quoted "deb" is not accepted by APT, so no need for quote word parsing here
+ line = match line.split_once(|c| char::is_ascii_whitespace(&c)) {
+ Some((package_type, rest)) => {
repo.types.push(package_type.try_into()?);
+ rest
}
None => return Ok(None), // empty line
- }
+ };
- Self::parse_options(&mut repo.options, &mut tokens)?;
+ line = line.trim_start_matches(|c| char::is_ascii_whitespace(&c));
+
+ let has_options = match line.strip_prefix('[') {
+ Some(rest) => {
+ // avoid the start of the options to be interpreted as the start of a quote word
+ line = rest;
+ true
+ }
+ None => false,
+ };
+
+ let mut tokens = SplitQuoteWord::new(line.to_string());
+
+ if has_options {
+ Self::parse_options(&mut repo.options, &mut tokens)?;
+ }
// the rest of the line is just '<uri> <suite> [<components>...]'
- let mut tokens = tokens.map(str::to_string);
repo.uris
- .push(tokens.next().ok_or_else(|| format_err!("missing URI"))?);
- repo.suites
- .push(tokens.next().ok_or_else(|| format_err!("missing suite"))?);
- repo.components.extend(tokens);
+ .push(tokens.next().ok_or_else(|| format_err!("missing URI"))??);
+ repo.suites.push(
+ tokens
+ .next()
+ .ok_or_else(|| format_err!("missing suite"))??,
+ );
+ for token in tokens {
+ repo.components.push(token?);
+ }
repo.comment = std::mem::take(&mut self.comment);
diff --git a/src/repositories/repository.rs b/src/repositories/repository.rs
index cf17380..4e1ea6e 100644
--- a/src/repositories/repository.rs
+++ b/src/repositories/repository.rs
@@ -433,6 +433,41 @@ fn suite_variant(suite: &str) -> (&str, &str) {
(suite, "")
}
+/// Strips existing double quotes from the string first, and then adds double quotes at
+/// the beginning and end if there is an ASCII whitespace in the `string`, which is not
+/// escaped by `[]`.
+fn quote_for_one_line(string: &str) -> String {
+ let mut add_quotes = false;
+ let mut wait_for_bracket = false;
+
+ // easier to just quote the whole string, so ignore pre-existing quotes
+ // currently, parsing removes them anyways, but being on the safe side is rather cheap
+ let string = string.replace('"', "");
+
+ for c in string.chars() {
+ if wait_for_bracket {
+ if c == ']' {
+ wait_for_bracket = false;
+ }
+ continue;
+ }
+
+ if char::is_ascii_whitespace(&c) {
+ add_quotes = true;
+ break;
+ }
+
+ if c == '[' {
+ wait_for_bracket = true;
+ }
+ }
+
+ match add_quotes {
+ true => format!("\"{}\"", string),
+ false => string,
+ }
+}
+
/// Writes a repository in one-line format followed by a blank line.
///
/// Expects that `repo.file_type == APTRepositoryFileType::List`.
@@ -457,15 +492,26 @@ fn write_one_line(repo: &APTRepository, w: &mut dyn Write) -> Result<(), Error>
if !repo.options.is_empty() {
write!(w, "[ ")?;
- repo.options
- .iter()
- .try_for_each(|option| write!(w, "{}={} ", option.key, option.values.join(",")))?;
+
+ for option in repo.options.iter() {
+ let option = quote_for_one_line(&format!("{}={}", option.key, option.values.join(",")));
+ write!(w, "{} ", option)?;
+ }
+
write!(w, "] ")?;
};
- write!(w, "{} ", repo.uris[0])?;
- write!(w, "{} ", repo.suites[0])?;
- writeln!(w, "{}", repo.components.join(" "))?;
+ write!(w, "{} ", quote_for_one_line(&repo.uris[0]))?;
+ write!(w, "{} ", quote_for_one_line(&repo.suites[0]))?;
+ writeln!(
+ w,
+ "{}",
+ repo.components
+ .iter()
+ .map(|comp| quote_for_one_line(comp))
+ .collect::<Vec<String>>()
+ .join(" ")
+ )?;
writeln!(w)?;
diff --git a/tests/sources.list.d.expected/cdroms.list b/tests/sources.list.d.expected/cdroms.list
new file mode 100644
index 0000000..7d75573
--- /dev/null
+++ b/tests/sources.list.d.expected/cdroms.list
@@ -0,0 +1,10 @@
+# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/ stretch pve
+
+# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/proxmox/packages/ /
+
+deb [ trusted=yes ] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve
+
+deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ /
+
+deb [ trusted=yes ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main
+
diff --git a/tests/sources.list.d.expected/files.list b/tests/sources.list.d.expected/files.list
new file mode 100644
index 0000000..5e77023
--- /dev/null
+++ b/tests/sources.list.d.expected/files.list
@@ -0,0 +1,4 @@
+deb [ trusted=yes ] "file:///some/spacey/mount point/" bullseye pve
+
+deb [ lang=it ] "file:///some/spacey/mount point/proxmox/packages/" /
+
diff --git a/tests/sources.list.d/cdroms.list b/tests/sources.list.d/cdroms.list
new file mode 100644
index 0000000..3b12626
--- /dev/null
+++ b/tests/sources.list.d/cdroms.list
@@ -0,0 +1,7 @@
+#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/ stretch pve
+#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/proxmox/packages/ /
+
+deb [trusted=yes] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve
+deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ /
+
+deb [ "trusted=yes" ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main
diff --git a/tests/sources.list.d/files.list b/tests/sources.list.d/files.list
new file mode 100644
index 0000000..4a5e4c2
--- /dev/null
+++ b/tests/sources.list.d/files.list
@@ -0,0 +1,2 @@
+deb [trusted=yes] "file:///some/spacey/mount point/" bullseye pve
+deb [lang="it"] file:///some/spacey/"mount point"/proxmox/packages/ /
--
2.30.2
next prev parent reply other threads:[~2021-07-01 13:46 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-01 13:46 [pve-devel] [PATCH proxmox-apt] Support quoted parsing for .list format Fabian Ebner
2021-07-01 13:46 ` [pve-devel] [PATCH proxmox-apt 1/2] avoid backtick unicode symbol in string Fabian Ebner
2021-07-01 13:46 ` Fabian Ebner [this message]
2021-07-01 16:30 ` [pve-devel] applied: [PATCH proxmox-apt] Support quoted parsing for .list format Thomas Lamprecht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210701134634.130628-3-f.ebner@proxmox.com \
--to=f.ebner@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal