From: Dominik Csapak <d.csapak@proxmox.com>
To: pmg-devel@lists.proxmox.com
Subject: [pmg-devel] [PATCH pmg-api v4 06/12] quarantine: handle utf8 data
Date: Thu, 24 Nov 2022 13:21:06 +0100 [thread overview]
Message-ID: <20221124122112.666868-7-d.csapak@proxmox.com> (raw)
In-Reply-To: <20221124122112.666868-1-d.csapak@proxmox.com>
From: Stoiko Ivanov <s.ivanov@proxmox.com>
use try_decode_utf8 for sender/receiver of the smtp dialog and mail
headers since they're either ASCII (not SMTPUTF8) or UTF-8 (with SMTPUTF8)
encoded
change the mail regex for wl/bl to basic email/domain syntax without
the restriction of ascii only. (whitespace and backslashes are
forbidden, but they shouldn't normally occur in email addresses and
domains)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
[ D: Added Commmit message ]
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
src/PMG/API2/Quarantine.pm | 10 +++++-----
src/PMG/HTMLMail.pm | 7 ++++---
src/PMG/Quarantine.pm | 13 +++++++------
src/PMG/RuleDB/Spam.pm | 12 ++++++------
4 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm
index ddf7c04..819c78c 100644
--- a/src/PMG/API2/Quarantine.pm
+++ b/src/PMG/API2/Quarantine.pm
@@ -141,8 +141,8 @@ my $parse_header_info = sub {
my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender')));
$res->{sender} = $sender if $sender && ($sender ne $res->{from});
- $res->{envelope_sender} = $ref->{sender};
- $res->{receiver} = $ref->{receiver} // $ref->{pmail};
+ $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender});
+ $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail});
$res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid};
$res->{time} = $ref->{time};
$res->{bytes} = $ref->{bytes};
@@ -437,7 +437,7 @@ __PACKAGE__->register_method ({
$sth->execute();
while (my $ref = $sth->fetchrow_hashref()) {
- push @$res, { mail => $ref->{pmail} };
+ push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
}
return $res;
@@ -532,7 +532,7 @@ __PACKAGE__->register_method ({
}
while (my $ref = $sth->fetchrow_hashref()) {
- push @$res, { mail => $ref->{pmail} };
+ push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
}
return $res;
@@ -569,7 +569,7 @@ my $quarantine_api = sub {
}
if ($check_pmail || $role eq 'quser') {
- $sth->execute($pmail);
+ $sth->execute(encode('UTF-8', $pmail));
} else {
$sth->execute();
}
diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm
index 87f5c40..207c52c 100644
--- a/src/PMG/HTMLMail.pm
+++ b/src/PMG/HTMLMail.pm
@@ -192,9 +192,10 @@ sub read_raw_email {
# read header
my $header;
while (defined(my $line = <$fh>)) {
- $raw_header .= $line;
- chomp $line;
- push @$header, $line;
+ my $decoded_line = PMG::Utils::try_decode_utf8($line);
+ $raw_header .= $decoded_line;
+ chomp $decoded_line;
+ push @$header, $decoded_line;
last if $line =~ m/^\s*$/;
}
diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm
index 77af8cc..aa6b948 100644
--- a/src/PMG/Quarantine.pm
+++ b/src/PMG/Quarantine.pm
@@ -3,6 +3,7 @@ package PMG::Quarantine;
use strict;
use warnings;
use Net::SMTP;
+use Encode qw(encode);
use PVE::SafeSyslog;
use PVE::Tools;
@@ -16,7 +17,7 @@ sub add_to_blackwhite {
my $name = $listname eq 'BL' ? 'BL' : 'WL';
my $oname = $listname eq 'BL' ? 'WL' : 'BL';
- my $qu = $dbh->quote ($username);
+ my $qu = $dbh->quote (encode('UTF-8', $username));
my $sth = $dbh->prepare(
"SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')");
@@ -25,13 +26,13 @@ sub add_to_blackwhite {
my $list = { 'WL' => {}, 'BL' => {} };
while (my $ref = $sth->fetchrow_hashref()) {
- my $data = $ref->{data};
+ my $data = PMG::Utils::try_decode_utf8($ref->{data});
$data =~ s/[,;]/ /g;
my @alist = split('\s+', $data);
my $tmp = {};
foreach my $a (@alist) {
- if ($a =~ m/^[[:ascii:]]+$/) {
+ if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) {
$tmp->{$a} = 1;
}
}
@@ -50,7 +51,7 @@ sub add_to_blackwhite {
if ($delete) {
delete($list->{$name}->{$v});
} else {
- if ($v =~ m/[[:^ascii:]]/) {
+ if ($v =~ m/[\s\\]/) {
die "email address '$v' contains invalid characters\n";
}
$list->{$name}->{$v} = 1;
@@ -58,8 +59,8 @@ sub add_to_blackwhite {
}
}
- my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || '');
- my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || '');
+ my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || '');
+ my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || '');
if (!$delete) {
my $maxlen = 200000;
diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm
index 99056a3..bc1d422 100644
--- a/src/PMG/RuleDB/Spam.pm
+++ b/src/PMG/RuleDB/Spam.pm
@@ -94,7 +94,7 @@ sub parse_addrlist {
my $regex = $addr;
# SA like checks
$regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ?
- $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars
+ $regex =~ s/([^\*\?_\w])/\\$1/g; # escape possible metachars
$regex =~ tr/?/./; # replace "?" with "."
$regex =~ s/\*+/\.\*/g; # replace "*" with ".*"
@@ -149,13 +149,13 @@ sub get_blackwhite {
$sth->execute();
while (my $ref = $sth->fetchrow_hashref()) {
- my $pmail = lc ($ref->{pmail});
+ my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail}));
if ($ref->{name} eq 'WL') {
$target_info->{$pmail}->{whitelist} =
- parse_addrlist($ref->{data});
+ parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
} elsif ($ref->{name} eq 'BL') {
$target_info->{$pmail}->{blacklist} =
- parse_addrlist($ref->{data});
+ parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
}
}
@@ -205,7 +205,7 @@ sub what_match_targets {
($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
check_addrlist($list, $queue->{all_from_addrs})) {
syslog('info', "%s: sender in user (%s) whitelist",
- $queue->{logid}, $pmail);
+ $queue->{logid}, encode('UTF-8', $pmail));
} else {
$target_info->{$t}->{marks} = []; # never add additional marks here
$target_info->{$t}->{spaminfo} = $info;
@@ -234,7 +234,7 @@ sub what_match_targets {
$target_info->{$t}->{marks} = [];
$target_info->{$t}->{spaminfo} = $info;
syslog ('info', "%s: sender in user (%s) blacklist",
- $queue->{logid}, $pmail);
+ $queue->{logid}, encode('UTF-8',$pmail));
}
}
}
--
2.30.2
next prev parent reply other threads:[~2022-11-24 12:21 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-24 12:21 [pmg-devel] [PATCH pmg-api v4 00/12] ruledb - improve experience for non-ascii tests and mails Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 01/12] utils: return perl string from decode_rfc1522 Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 02/12] ruledb: properly substitute prox_vars in headers Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 03/12] fix #2541 ruledb: encode relevant values as utf-8 in database Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 04/12] ruledb: encode e-mail addresses for syslog Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 05/12] partially fix #2465: handle smtputf8 addresses in the rule-system Dominik Csapak
2022-11-24 12:21 ` Dominik Csapak [this message]
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 07/12] pmgqm: handle smtputf8 data Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 08/12] statistics: handle utf8 data Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 09/12] quarantine: fix adding non-ascii senders to wl/bl Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 10/12] utils: refactor rfc1522_to_html Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 11/12] ldap: improve unicode support Dominik Csapak
2022-11-24 12:21 ` [pmg-devel] [PATCH pmg-api v4 12/12] statistics: refactor filter_text generation Dominik Csapak
2022-11-24 15:45 ` [pmg-devel] applied-series: [PATCH pmg-api v4 00/12] ruledb - improve experience for non-ascii tests and mails Thomas Lamprecht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221124122112.666868-7-d.csapak@proxmox.com \
--to=d.csapak@proxmox.com \
--cc=pmg-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.