From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 102A9B1C6 for ; Wed, 23 Nov 2022 15:15:48 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 52FCA22BA8 for ; Wed, 23 Nov 2022 15:15:17 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 23 Nov 2022 15:15:14 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 815A544D7A; Wed, 23 Nov 2022 15:15:13 +0100 (CET) Message-ID: Date: Wed, 23 Nov 2022 15:15:12 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Thunderbird/108.0 Content-Language: en-US To: Stoiko Ivanov , pmg-devel@lists.proxmox.com References: <20221123092336.11423-1-s.ivanov@proxmox.com> <20221123092336.11423-7-s.ivanov@proxmox.com> From: Dominik Csapak In-Reply-To: <20221123092336.11423-7-s.ivanov@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.065 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -0.001 Looks like a legit reply (A) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pmg-devel] [PATCH pmg-api v3 6/8] quarantine: handle utf8 data X-BeenThere: pmg-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Mail Gateway development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 23 Nov 2022 14:15:48 -0000 i'd like to have some rationale for the changes in the commit message at least for the more non-obvious ones (regex changes for example) comments inline On 11/23/22 10:23, Stoiko Ivanov wrote: > Signed-off-by: Stoiko Ivanov > --- > src/PMG/API2/Quarantine.pm | 10 +++++----- > src/PMG/HTMLMail.pm | 7 ++++--- > src/PMG/Quarantine.pm | 13 +++++++------ > src/PMG/RuleDB/Spam.pm | 12 ++++++------ > 4 files changed, 22 insertions(+), 20 deletions(-) > > diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm > index ddf7c04..819c78c 100644 > --- a/src/PMG/API2/Quarantine.pm > +++ b/src/PMG/API2/Quarantine.pm > @@ -141,8 +141,8 @@ my $parse_header_info = sub { > my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender'))); > $res->{sender} = $sender if $sender && ($sender ne $res->{from}); > > - $res->{envelope_sender} = $ref->{sender}; > - $res->{receiver} = $ref->{receiver} // $ref->{pmail}; > + $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender}); > + $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail}); maybe we should note here in a comment that these are not headers but part of the smtp dialog and cannot be quoted-printable/base64 encoded? > $res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid}; > $res->{time} = $ref->{time}; > $res->{bytes} = $ref->{bytes}; > @@ -437,7 +437,7 @@ __PACKAGE__->register_method ({ > $sth->execute(); > > while (my $ref = $sth->fetchrow_hashref()) { > - push @$res, { mail => $ref->{pmail} }; > + push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) }; > } > > return $res; > @@ -532,7 +532,7 @@ __PACKAGE__->register_method ({ > } > > while (my $ref = $sth->fetchrow_hashref()) { > - push @$res, { mail => $ref->{pmail} }; > + push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) }; > } > > return $res; > @@ -569,7 +569,7 @@ my $quarantine_api = sub { > } > > if ($check_pmail || $role eq 'quser') { > - $sth->execute($pmail); > + $sth->execute(encode('UTF-8', $pmail)); > } else { > $sth->execute(); > } > diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm > index 87f5c40..207c52c 100644 > --- a/src/PMG/HTMLMail.pm > +++ b/src/PMG/HTMLMail.pm > @@ -192,9 +192,10 @@ sub read_raw_email { > # read header > my $header; > while (defined(my $line = <$fh>)) { > - $raw_header .= $line; > - chomp $line; > - push @$header, $line; > + my $decoded_line = PMG::Utils::try_decode_utf8($line); > + $raw_header .= $decoded_line; > + chomp $decoded_line; > + push @$header, $decoded_line; > last if $line =~ m/^\s*$/; > } > > diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm > index 77af8cc..aa6b948 100644 > --- a/src/PMG/Quarantine.pm > +++ b/src/PMG/Quarantine.pm > @@ -3,6 +3,7 @@ package PMG::Quarantine; > use strict; > use warnings; > use Net::SMTP; > +use Encode qw(encode); > > use PVE::SafeSyslog; > use PVE::Tools; > @@ -16,7 +17,7 @@ sub add_to_blackwhite { > > my $name = $listname eq 'BL' ? 'BL' : 'WL'; > my $oname = $listname eq 'BL' ? 'WL' : 'BL'; > - my $qu = $dbh->quote ($username); > + my $qu = $dbh->quote (encode('UTF-8', $username)); > > my $sth = $dbh->prepare( > "SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')"); > @@ -25,13 +26,13 @@ sub add_to_blackwhite { > my $list = { 'WL' => {}, 'BL' => {} }; > > while (my $ref = $sth->fetchrow_hashref()) { > - my $data = $ref->{data}; > + my $data = PMG::Utils::try_decode_utf8($ref->{data}); > $data =~ s/[,;]/ /g; > my @alist = split('\s+', $data); > > my $tmp = {}; > foreach my $a (@alist) { > - if ($a =~ m/^[[:ascii:]]+$/) { > + if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) { that change seems a bit dangerous, maybe we should at least filter out some control characters here? > $tmp->{$a} = 1; > } > } > @@ -50,7 +51,7 @@ sub add_to_blackwhite { > if ($delete) { > delete($list->{$name}->{$v}); > } else { > - if ($v =~ m/[[:^ascii:]]/) { > + if ($v =~ m/[\s\\]/) { same here, going from 'non-ascii' is forbidden to 'non whitespace+\' is forbidden is a bit broad imho > die "email address '$v' contains invalid characters\n"; > } > $list->{$name}->{$v} = 1; > @@ -58,8 +59,8 @@ sub add_to_blackwhite { > } > } > > - my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || ''); > - my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || ''); > + my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || ''); > + my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || ''); > > if (!$delete) { > my $maxlen = 200000; > diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm > index 99056a3..bc1d422 100644 > --- a/src/PMG/RuleDB/Spam.pm > +++ b/src/PMG/RuleDB/Spam.pm > @@ -94,7 +94,7 @@ sub parse_addrlist { > my $regex = $addr; > # SA like checks > $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ? > - $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars > + $regex =~ s/([^\*\?_\w])/\\$1/g; # escape possible metachars what does \w include more here than a-zA-Z0-9 ? (a short explanation in the commit message would be enough imo) > $regex =~ tr/?/./; # replace "?" with "." > $regex =~ s/\*+/\.\*/g; # replace "*" with ".*" > > @@ -149,13 +149,13 @@ sub get_blackwhite { > $sth->execute(); > > while (my $ref = $sth->fetchrow_hashref()) { > - my $pmail = lc ($ref->{pmail}); > + my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail})); > if ($ref->{name} eq 'WL') { > $target_info->{$pmail}->{whitelist} = > - parse_addrlist($ref->{data}); > + parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data})); > } elsif ($ref->{name} eq 'BL') { > $target_info->{$pmail}->{blacklist} = > - parse_addrlist($ref->{data}); > + parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data})); > } > } > > @@ -205,7 +205,7 @@ sub what_match_targets { > ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) && > check_addrlist($list, $queue->{all_from_addrs})) { > syslog('info', "%s: sender in user (%s) whitelist", > - $queue->{logid}, $pmail); > + $queue->{logid}, encode('UTF-8', $pmail)); > } else { > $target_info->{$t}->{marks} = []; # never add additional marks here > $target_info->{$t}->{spaminfo} = $info; > @@ -234,7 +234,7 @@ sub what_match_targets { > $target_info->{$t}->{marks} = []; > $target_info->{$t}->{spaminfo} = $info; > syslog ('info', "%s: sender in user (%s) blacklist", > - $queue->{logid}, $pmail); > + $queue->{logid}, encode('UTF-8',$pmail)); > } > } > }