From: Dominik Csapak <d.csapak@proxmox.com>
To: Stoiko Ivanov <s.ivanov@proxmox.com>, pmg-devel@lists.proxmox.com
Subject: Re: [pmg-devel] [PATCH pmg-api v3 6/8] quarantine: handle utf8 data
Date: Wed, 23 Nov 2022 15:15:12 +0100 [thread overview]
Message-ID: <bfc96a1d-5b2e-73b3-06ac-ba2810238465@proxmox.com> (raw)
In-Reply-To: <20221123092336.11423-7-s.ivanov@proxmox.com>
i'd like to have some rationale for the changes in the commit message
at least for the more non-obvious ones (regex changes for example)
comments inline
On 11/23/22 10:23, Stoiko Ivanov wrote:
> Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
> ---
> src/PMG/API2/Quarantine.pm | 10 +++++-----
> src/PMG/HTMLMail.pm | 7 ++++---
> src/PMG/Quarantine.pm | 13 +++++++------
> src/PMG/RuleDB/Spam.pm | 12 ++++++------
> 4 files changed, 22 insertions(+), 20 deletions(-)
>
> diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm
> index ddf7c04..819c78c 100644
> --- a/src/PMG/API2/Quarantine.pm
> +++ b/src/PMG/API2/Quarantine.pm
> @@ -141,8 +141,8 @@ my $parse_header_info = sub {
> my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender')));
> $res->{sender} = $sender if $sender && ($sender ne $res->{from});
>
> - $res->{envelope_sender} = $ref->{sender};
> - $res->{receiver} = $ref->{receiver} // $ref->{pmail};
> + $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender});
> + $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail});
maybe we should note here in a comment that these are not headers
but part of the smtp dialog and cannot be quoted-printable/base64 encoded?
> $res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid};
> $res->{time} = $ref->{time};
> $res->{bytes} = $ref->{bytes};
> @@ -437,7 +437,7 @@ __PACKAGE__->register_method ({
> $sth->execute();
>
> while (my $ref = $sth->fetchrow_hashref()) {
> - push @$res, { mail => $ref->{pmail} };
> + push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
> }
>
> return $res;
> @@ -532,7 +532,7 @@ __PACKAGE__->register_method ({
> }
>
> while (my $ref = $sth->fetchrow_hashref()) {
> - push @$res, { mail => $ref->{pmail} };
> + push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
> }
>
> return $res;
> @@ -569,7 +569,7 @@ my $quarantine_api = sub {
> }
>
> if ($check_pmail || $role eq 'quser') {
> - $sth->execute($pmail);
> + $sth->execute(encode('UTF-8', $pmail));
> } else {
> $sth->execute();
> }
> diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm
> index 87f5c40..207c52c 100644
> --- a/src/PMG/HTMLMail.pm
> +++ b/src/PMG/HTMLMail.pm
> @@ -192,9 +192,10 @@ sub read_raw_email {
> # read header
> my $header;
> while (defined(my $line = <$fh>)) {
> - $raw_header .= $line;
> - chomp $line;
> - push @$header, $line;
> + my $decoded_line = PMG::Utils::try_decode_utf8($line);
> + $raw_header .= $decoded_line;
> + chomp $decoded_line;
> + push @$header, $decoded_line;
> last if $line =~ m/^\s*$/;
> }
>
> diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm
> index 77af8cc..aa6b948 100644
> --- a/src/PMG/Quarantine.pm
> +++ b/src/PMG/Quarantine.pm
> @@ -3,6 +3,7 @@ package PMG::Quarantine;
> use strict;
> use warnings;
> use Net::SMTP;
> +use Encode qw(encode);
>
> use PVE::SafeSyslog;
> use PVE::Tools;
> @@ -16,7 +17,7 @@ sub add_to_blackwhite {
>
> my $name = $listname eq 'BL' ? 'BL' : 'WL';
> my $oname = $listname eq 'BL' ? 'WL' : 'BL';
> - my $qu = $dbh->quote ($username);
> + my $qu = $dbh->quote (encode('UTF-8', $username));
>
> my $sth = $dbh->prepare(
> "SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')");
> @@ -25,13 +26,13 @@ sub add_to_blackwhite {
> my $list = { 'WL' => {}, 'BL' => {} };
>
> while (my $ref = $sth->fetchrow_hashref()) {
> - my $data = $ref->{data};
> + my $data = PMG::Utils::try_decode_utf8($ref->{data});
> $data =~ s/[,;]/ /g;
> my @alist = split('\s+', $data);
>
> my $tmp = {};
> foreach my $a (@alist) {
> - if ($a =~ m/^[[:ascii:]]+$/) {
> + if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) {
that change seems a bit dangerous, maybe we should at least
filter out some control characters here?
> $tmp->{$a} = 1;
> }
> }
> @@ -50,7 +51,7 @@ sub add_to_blackwhite {
> if ($delete) {
> delete($list->{$name}->{$v});
> } else {
> - if ($v =~ m/[[:^ascii:]]/) {
> + if ($v =~ m/[\s\\]/) {
same here, going from 'non-ascii' is forbidden to 'non whitespace+\' is forbidden
is a bit broad imho
> die "email address '$v' contains invalid characters\n";
> }
> $list->{$name}->{$v} = 1;
> @@ -58,8 +59,8 @@ sub add_to_blackwhite {
> }
> }
>
> - my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || '');
> - my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || '');
> + my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || '');
> + my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || '');
>
> if (!$delete) {
> my $maxlen = 200000;
> diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm
> index 99056a3..bc1d422 100644
> --- a/src/PMG/RuleDB/Spam.pm
> +++ b/src/PMG/RuleDB/Spam.pm
> @@ -94,7 +94,7 @@ sub parse_addrlist {
> my $regex = $addr;
> # SA like checks
> $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ?
> - $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars
> + $regex =~ s/([^\*\?_\w])/\\$1/g; # escape possible metachars
what does \w include more here than a-zA-Z0-9 ?
(a short explanation in the commit message would be enough imo)
> $regex =~ tr/?/./; # replace "?" with "."
> $regex =~ s/\*+/\.\*/g; # replace "*" with ".*"
>
> @@ -149,13 +149,13 @@ sub get_blackwhite {
> $sth->execute();
>
> while (my $ref = $sth->fetchrow_hashref()) {
> - my $pmail = lc ($ref->{pmail});
> + my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail}));
> if ($ref->{name} eq 'WL') {
> $target_info->{$pmail}->{whitelist} =
> - parse_addrlist($ref->{data});
> + parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
> } elsif ($ref->{name} eq 'BL') {
> $target_info->{$pmail}->{blacklist} =
> - parse_addrlist($ref->{data});
> + parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
> }
> }
>
> @@ -205,7 +205,7 @@ sub what_match_targets {
> ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
> check_addrlist($list, $queue->{all_from_addrs})) {
> syslog('info', "%s: sender in user (%s) whitelist",
> - $queue->{logid}, $pmail);
> + $queue->{logid}, encode('UTF-8', $pmail));
> } else {
> $target_info->{$t}->{marks} = []; # never add additional marks here
> $target_info->{$t}->{spaminfo} = $info;
> @@ -234,7 +234,7 @@ sub what_match_targets {
> $target_info->{$t}->{marks} = [];
> $target_info->{$t}->{spaminfo} = $info;
> syslog ('info', "%s: sender in user (%s) blacklist",
> - $queue->{logid}, $pmail);
> + $queue->{logid}, encode('UTF-8',$pmail));
> }
> }
> }
next prev parent reply other threads:[~2022-11-23 14:15 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-23 9:23 [pmg-devel] [PATCH pmg-api/pmg-gui v3] ruledb - improve experience for non-ascii tests and mails Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 1/8] utils: return perl string from decode_rfc1522 Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 2/8] ruledb: properly substitute prox_vars in headers Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 3/8] fix #2541 ruledb: encode relevant values as utf-8 in database Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 4/8] ruledb: encode e-mail addresses for syslog Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 5/8] partially fix #2465: handle smtputf8 addresses in the rule-system Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 6/8] quarantine: handle utf8 data Stoiko Ivanov
2022-11-23 14:15 ` Dominik Csapak [this message]
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 7/8] pmgqm: handle smtputf8 data Stoiko Ivanov
2022-11-23 14:20 ` Dominik Csapak
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-api v3 8/8] statistics: handle utf8 data Stoiko Ivanov
2022-11-23 14:26 ` Dominik Csapak
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-gui v3 1/2] utils: add custom validator for pmg-email-address Stoiko Ivanov
2022-11-23 9:23 ` [pmg-devel] [PATCH pmg-gui v3 2/2] userblocklists: use PMGMail as validator for pmail Stoiko Ivanov
2022-11-23 14:09 ` [pmg-devel] [PATCH pmg-api/pmg-gui v3] ruledb - improve experience for non-ascii tests and mails Dominik Csapak
2022-11-26 7:00 ` [pmg-devel] applied-gui: " Thomas Lamprecht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bfc96a1d-5b2e-73b3-06ac-ba2810238465@proxmox.com \
--to=d.csapak@proxmox.com \
--cc=pmg-devel@lists.proxmox.com \
--cc=s.ivanov@proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox