public inbox for pmg-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Dominik Csapak <d.csapak@proxmox.com>
To: Stoiko Ivanov <s.ivanov@proxmox.com>, pmg-devel@lists.proxmox.com
Subject: Re: [pmg-devel] [PATCH pmg-api v3 6/8] quarantine: handle utf8 data
Date: Wed, 23 Nov 2022 15:15:12 +0100	[thread overview]
Message-ID: <bfc96a1d-5b2e-73b3-06ac-ba2810238465@proxmox.com> (raw)
In-Reply-To: <20221123092336.11423-7-s.ivanov@proxmox.com>

i'd like to have some rationale for the changes in the commit message
at least for the more non-obvious ones (regex changes for example)

comments inline

On 11/23/22 10:23, Stoiko Ivanov wrote:
> Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
> ---
>   src/PMG/API2/Quarantine.pm | 10 +++++-----
>   src/PMG/HTMLMail.pm        |  7 ++++---
>   src/PMG/Quarantine.pm      | 13 +++++++------
>   src/PMG/RuleDB/Spam.pm     | 12 ++++++------
>   4 files changed, 22 insertions(+), 20 deletions(-)
> 
> diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm
> index ddf7c04..819c78c 100644
> --- a/src/PMG/API2/Quarantine.pm
> +++ b/src/PMG/API2/Quarantine.pm
> @@ -141,8 +141,8 @@ my $parse_header_info = sub {
>       my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender')));
>       $res->{sender} = $sender if $sender && ($sender ne $res->{from});
>   
> -    $res->{envelope_sender} = $ref->{sender};
> -    $res->{receiver} = $ref->{receiver} // $ref->{pmail};
> +    $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender});
> +    $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail});

maybe we should note here in a comment that these are not headers
but part of the smtp dialog and cannot be quoted-printable/base64 encoded?

>       $res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid};
>       $res->{time} = $ref->{time};
>       $res->{bytes} = $ref->{bytes};
> @@ -437,7 +437,7 @@ __PACKAGE__->register_method ({
>   	$sth->execute();
>   
>   	while (my $ref = $sth->fetchrow_hashref()) {
> -	    push @$res, { mail => $ref->{pmail} };
> +	    push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
>   	}
>   
>   	return $res;
> @@ -532,7 +532,7 @@ __PACKAGE__->register_method ({
>   	}
>   
>   	while (my $ref = $sth->fetchrow_hashref()) {
> -	    push @$res, { mail => $ref->{pmail} };
> +	    push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
>   	}
>   
>   	return $res;
> @@ -569,7 +569,7 @@ my $quarantine_api = sub {
>       }
>   
>       if ($check_pmail || $role eq 'quser') {
> -	$sth->execute($pmail);
> +	$sth->execute(encode('UTF-8', $pmail));
>       } else {
>   	$sth->execute();
>       }
> diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm
> index 87f5c40..207c52c 100644
> --- a/src/PMG/HTMLMail.pm
> +++ b/src/PMG/HTMLMail.pm
> @@ -192,9 +192,10 @@ sub read_raw_email {
>       # read header
>       my $header;
>       while (defined(my $line = <$fh>)) {
> -	$raw_header .= $line;
> -	chomp $line;
> -	push @$header, $line;
> +	my $decoded_line = PMG::Utils::try_decode_utf8($line);
> +	$raw_header .= $decoded_line;
> +	chomp $decoded_line;
> +	push @$header, $decoded_line;
>   	last if $line =~ m/^\s*$/;
>       }
>   
> diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm
> index 77af8cc..aa6b948 100644
> --- a/src/PMG/Quarantine.pm
> +++ b/src/PMG/Quarantine.pm
> @@ -3,6 +3,7 @@ package PMG::Quarantine;
>   use strict;
>   use warnings;
>   use Net::SMTP;
> +use Encode qw(encode);
>   
>   use PVE::SafeSyslog;
>   use PVE::Tools;
> @@ -16,7 +17,7 @@ sub add_to_blackwhite {
>   
>       my $name = $listname eq 'BL' ? 'BL' : 'WL';
>       my $oname = $listname eq 'BL' ? 'WL' : 'BL';
> -    my $qu = $dbh->quote ($username);
> +    my $qu = $dbh->quote (encode('UTF-8', $username));
>   
>       my $sth = $dbh->prepare(
>   	"SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')");
> @@ -25,13 +26,13 @@ sub add_to_blackwhite {
>       my $list = { 'WL' => {}, 'BL' => {} };
>   
>       while (my $ref = $sth->fetchrow_hashref()) {
> -	my $data = $ref->{data};
> +	my $data = PMG::Utils::try_decode_utf8($ref->{data});
>   	$data =~ s/[,;]/ /g;
>   	my @alist = split('\s+', $data);
>   
>   	my $tmp = {};
>   	foreach my $a (@alist) {
> -	    if ($a =~ m/^[[:ascii:]]+$/) {
> +	    if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) {

that change seems a bit dangerous, maybe we should at least
filter out some control characters here?

>   		$tmp->{$a} = 1;
>   	    }
>   	}
> @@ -50,7 +51,7 @@ sub add_to_blackwhite {
>   	    if ($delete) {
>   		delete($list->{$name}->{$v});
>   	    } else {
> -		if ($v =~ m/[[:^ascii:]]/) {
> +		if ($v =~ m/[\s\\]/) {

same here, going from 'non-ascii' is forbidden to 'non whitespace+\' is forbidden
is a bit broad imho

>   		    die "email address '$v' contains invalid characters\n";
>   		}
>   		$list->{$name}->{$v} = 1;
> @@ -58,8 +59,8 @@ sub add_to_blackwhite {
>   	    }
>   	}
>   
> -	my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || '');
> -	my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || '');
> +	my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || '');
> +	my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || '');
>   
>   	if (!$delete) {
>   	    my $maxlen = 200000;
> diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm
> index 99056a3..bc1d422 100644
> --- a/src/PMG/RuleDB/Spam.pm
> +++ b/src/PMG/RuleDB/Spam.pm
> @@ -94,7 +94,7 @@ sub parse_addrlist {
>   	my $regex = $addr;
>   	# SA like checks
>   	$regex =~ s/[\000\\\(]/_/gs;		# is this really necessasry ?
> -	$regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g;	# escape possible metachars
> +	$regex =~ s/([^\*\?_\w])/\\$1/g;	# escape possible metachars

what does \w include more here than a-zA-Z0-9 ?
(a short explanation in the commit message would be enough imo)

>   	$regex =~ tr/?/./;			# replace "?" with "."
>   	$regex =~ s/\*+/\.\*/g;			# replace "*" with  ".*"
>   
> @@ -149,13 +149,13 @@ sub get_blackwhite {
>   	$sth->execute();
>   
>   	while (my $ref = $sth->fetchrow_hashref()) {
> -	    my $pmail = lc ($ref->{pmail});
> +	    my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail}));
>   	    if ($ref->{name} eq 'WL') {
>   		$target_info->{$pmail}->{whitelist} =
> -		    parse_addrlist($ref->{data});
> +		    parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
>   	    } elsif ($ref->{name} eq 'BL') {
>   		$target_info->{$pmail}->{blacklist} =
> -		    parse_addrlist($ref->{data});
> +		    parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
>   	    }
>   	}
>   
> @@ -205,7 +205,7 @@ sub what_match_targets {
>   		($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
>   		check_addrlist($list, $queue->{all_from_addrs})) {
>   		syslog('info', "%s: sender in user (%s) whitelist",
> -		       $queue->{logid}, $pmail);
> +		       $queue->{logid}, encode('UTF-8', $pmail));
>   	    } else {
>   		$target_info->{$t}->{marks} = []; # never add additional marks here
>   		$target_info->{$t}->{spaminfo} = $info;
> @@ -234,7 +234,7 @@ sub what_match_targets {
>   		$target_info->{$t}->{marks} = [];
>   		$target_info->{$t}->{spaminfo} = $info;
>   		syslog ('info', "%s: sender in user (%s) blacklist",
> -			$queue->{logid}, $pmail);
> +			$queue->{logid}, encode('UTF-8',$pmail));
>   	    }
>   	}
>       }





  reply	other threads:[~2022-11-23 14:15 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-23  9:23 [pmg-devel] [PATCH pmg-api/pmg-gui v3] ruledb - improve experience for non-ascii tests and mails Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 1/8] utils: return perl string from decode_rfc1522 Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 2/8] ruledb: properly substitute prox_vars in headers Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 3/8] fix #2541 ruledb: encode relevant values as utf-8 in database Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 4/8] ruledb: encode e-mail addresses for syslog Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 5/8] partially fix #2465: handle smtputf8 addresses in the rule-system Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 6/8] quarantine: handle utf8 data Stoiko Ivanov
2022-11-23 14:15   ` Dominik Csapak [this message]
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 7/8] pmgqm: handle smtputf8 data Stoiko Ivanov
2022-11-23 14:20   ` Dominik Csapak
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-api v3 8/8] statistics: handle utf8 data Stoiko Ivanov
2022-11-23 14:26   ` Dominik Csapak
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-gui v3 1/2] utils: add custom validator for pmg-email-address Stoiko Ivanov
2022-11-23  9:23 ` [pmg-devel] [PATCH pmg-gui v3 2/2] userblocklists: use PMGMail as validator for pmail Stoiko Ivanov
2022-11-23 14:09 ` [pmg-devel] [PATCH pmg-api/pmg-gui v3] ruledb - improve experience for non-ascii tests and mails Dominik Csapak
2022-11-26  7:00 ` [pmg-devel] applied-gui: " Thomas Lamprecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bfc96a1d-5b2e-73b3-06ac-ba2810238465@proxmox.com \
    --to=d.csapak@proxmox.com \
    --cc=pmg-devel@lists.proxmox.com \
    --cc=s.ivanov@proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal