public inbox for pmg-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Dominik Csapak <d.csapak@proxmox.com>
To: pmg-devel@lists.proxmox.com
Subject: [pmg-devel] [PATCH pmg-api v4 06/12] quarantine: handle utf8 data
Date: Thu, 24 Nov 2022 13:21:06 +0100	[thread overview]
Message-ID: <20221124122112.666868-7-d.csapak@proxmox.com> (raw)
In-Reply-To: <20221124122112.666868-1-d.csapak@proxmox.com>

From: Stoiko Ivanov <s.ivanov@proxmox.com>

use try_decode_utf8 for sender/receiver of the smtp dialog and mail
headers since they're either ASCII (not SMTPUTF8) or UTF-8 (with SMTPUTF8)
encoded

change the mail regex for wl/bl to basic email/domain syntax without
the restriction of ascii only. (whitespace and backslashes are
forbidden, but they shouldn't normally occur in email addresses and
domains)

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
[ D: Added Commmit message ]
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/PMG/API2/Quarantine.pm | 10 +++++-----
 src/PMG/HTMLMail.pm        |  7 ++++---
 src/PMG/Quarantine.pm      | 13 +++++++------
 src/PMG/RuleDB/Spam.pm     | 12 ++++++------
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm
index ddf7c04..819c78c 100644
--- a/src/PMG/API2/Quarantine.pm
+++ b/src/PMG/API2/Quarantine.pm
@@ -141,8 +141,8 @@ my $parse_header_info = sub {
     my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender')));
     $res->{sender} = $sender if $sender && ($sender ne $res->{from});
 
-    $res->{envelope_sender} = $ref->{sender};
-    $res->{receiver} = $ref->{receiver} // $ref->{pmail};
+    $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender});
+    $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail});
     $res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid};
     $res->{time} = $ref->{time};
     $res->{bytes} = $ref->{bytes};
@@ -437,7 +437,7 @@ __PACKAGE__->register_method ({
 	$sth->execute();
 
 	while (my $ref = $sth->fetchrow_hashref()) {
-	    push @$res, { mail => $ref->{pmail} };
+	    push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
 	}
 
 	return $res;
@@ -532,7 +532,7 @@ __PACKAGE__->register_method ({
 	}
 
 	while (my $ref = $sth->fetchrow_hashref()) {
-	    push @$res, { mail => $ref->{pmail} };
+	    push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
 	}
 
 	return $res;
@@ -569,7 +569,7 @@ my $quarantine_api = sub {
     }
 
     if ($check_pmail || $role eq 'quser') {
-	$sth->execute($pmail);
+	$sth->execute(encode('UTF-8', $pmail));
     } else {
 	$sth->execute();
     }
diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm
index 87f5c40..207c52c 100644
--- a/src/PMG/HTMLMail.pm
+++ b/src/PMG/HTMLMail.pm
@@ -192,9 +192,10 @@ sub read_raw_email {
     # read header
     my $header;
     while (defined(my $line = <$fh>)) {
-	$raw_header .= $line;
-	chomp $line;
-	push @$header, $line;
+	my $decoded_line = PMG::Utils::try_decode_utf8($line);
+	$raw_header .= $decoded_line;
+	chomp $decoded_line;
+	push @$header, $decoded_line;
 	last if $line =~ m/^\s*$/;
     }
 
diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm
index 77af8cc..aa6b948 100644
--- a/src/PMG/Quarantine.pm
+++ b/src/PMG/Quarantine.pm
@@ -3,6 +3,7 @@ package PMG::Quarantine;
 use strict;
 use warnings;
 use Net::SMTP;
+use Encode qw(encode);
 
 use PVE::SafeSyslog;
 use PVE::Tools;
@@ -16,7 +17,7 @@ sub add_to_blackwhite {
 
     my $name = $listname eq 'BL' ? 'BL' : 'WL';
     my $oname = $listname eq 'BL' ? 'WL' : 'BL';
-    my $qu = $dbh->quote ($username);
+    my $qu = $dbh->quote (encode('UTF-8', $username));
 
     my $sth = $dbh->prepare(
 	"SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')");
@@ -25,13 +26,13 @@ sub add_to_blackwhite {
     my $list = { 'WL' => {}, 'BL' => {} };
 
     while (my $ref = $sth->fetchrow_hashref()) {
-	my $data = $ref->{data};
+	my $data = PMG::Utils::try_decode_utf8($ref->{data});
 	$data =~ s/[,;]/ /g;
 	my @alist = split('\s+', $data);
 
 	my $tmp = {};
 	foreach my $a (@alist) {
-	    if ($a =~ m/^[[:ascii:]]+$/) {
+	    if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) {
 		$tmp->{$a} = 1;
 	    }
 	}
@@ -50,7 +51,7 @@ sub add_to_blackwhite {
 	    if ($delete) {
 		delete($list->{$name}->{$v});
 	    } else {
-		if ($v =~ m/[[:^ascii:]]/) {
+		if ($v =~ m/[\s\\]/) {
 		    die "email address '$v' contains invalid characters\n";
 		}
 		$list->{$name}->{$v} = 1;
@@ -58,8 +59,8 @@ sub add_to_blackwhite {
 	    }
 	}
 
-	my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || '');
-	my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || '');
+	my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || '');
+	my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || '');
 
 	if (!$delete) {
 	    my $maxlen = 200000;
diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm
index 99056a3..bc1d422 100644
--- a/src/PMG/RuleDB/Spam.pm
+++ b/src/PMG/RuleDB/Spam.pm
@@ -94,7 +94,7 @@ sub parse_addrlist {
 	my $regex = $addr;
 	# SA like checks
 	$regex =~ s/[\000\\\(]/_/gs;		# is this really necessasry ?
-	$regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g;	# escape possible metachars
+	$regex =~ s/([^\*\?_\w])/\\$1/g;	# escape possible metachars
 	$regex =~ tr/?/./;			# replace "?" with "."
 	$regex =~ s/\*+/\.\*/g;			# replace "*" with  ".*"
 
@@ -149,13 +149,13 @@ sub get_blackwhite {
 	$sth->execute();
 
 	while (my $ref = $sth->fetchrow_hashref()) {
-	    my $pmail = lc ($ref->{pmail});
+	    my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail}));
 	    if ($ref->{name} eq 'WL') {
 		$target_info->{$pmail}->{whitelist} = 
-		    parse_addrlist($ref->{data});
+		    parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
 	    } elsif ($ref->{name} eq 'BL') {
 		$target_info->{$pmail}->{blacklist} = 
-		    parse_addrlist($ref->{data});
+		    parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
 	    }
 	}
 
@@ -205,7 +205,7 @@ sub what_match_targets {
 		($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
 		check_addrlist($list, $queue->{all_from_addrs})) {
 		syslog('info', "%s: sender in user (%s) whitelist", 
-		       $queue->{logid}, $pmail);
+		       $queue->{logid}, encode('UTF-8', $pmail));
 	    } else {
 		$target_info->{$t}->{marks} = []; # never add additional marks here
 		$target_info->{$t}->{spaminfo} = $info;
@@ -234,7 +234,7 @@ sub what_match_targets {
 		$target_info->{$t}->{marks} = [];
 		$target_info->{$t}->{spaminfo} = $info;
 		syslog ('info', "%s: sender in user (%s) blacklist", 
-			$queue->{logid}, $pmail);
+			$queue->{logid}, encode('UTF-8',$pmail));
 	    }
 	}
     }
-- 
2.30.2





  parent reply	other threads:[~2022-11-24 12:21 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-24 12:21 [pmg-devel] [PATCH pmg-api v4 00/12] ruledb - improve experience for non-ascii tests and mails Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 01/12] utils: return perl string from decode_rfc1522 Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 02/12] ruledb: properly substitute prox_vars in headers Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 03/12] fix #2541 ruledb: encode relevant values as utf-8 in database Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 04/12] ruledb: encode e-mail addresses for syslog Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 05/12] partially fix #2465: handle smtputf8 addresses in the rule-system Dominik Csapak
2022-11-24 12:21 ` Dominik Csapak [this message]
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 07/12] pmgqm: handle smtputf8 data Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 08/12] statistics: handle utf8 data Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 09/12] quarantine: fix adding non-ascii senders to wl/bl Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 10/12] utils: refactor rfc1522_to_html Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 11/12] ldap: improve unicode support Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 12/12] statistics: refactor filter_text generation Dominik Csapak
2022-11-24 15:45 ` [pmg-devel] applied-series: [PATCH pmg-api v4 00/12] ruledb - improve experience for non-ascii tests and mails Thomas Lamprecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221124122112.666868-7-d.csapak@proxmox.com \
    --to=d.csapak@proxmox.com \
    --cc=pmg-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal