From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <s.ivanov@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 551BB954F
 for <pmg-devel@lists.proxmox.com>; Thu, 17 Nov 2022 16:06:50 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 11D732FC7E
 for <pmg-devel@lists.proxmox.com>; Thu, 17 Nov 2022 16:06:50 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pmg-devel@lists.proxmox.com>; Thu, 17 Nov 2022 16:06:46 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id EBEC444D80
 for <pmg-devel@lists.proxmox.com>; Thu, 17 Nov 2022 16:06:45 +0100 (CET)
From: Stoiko Ivanov <s.ivanov@proxmox.com>
To: pmg-devel@lists.proxmox.com
Date: Thu, 17 Nov 2022 16:06:08 +0100
Message-Id: <20221117150611.253644-6-s.ivanov@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20221117150611.253644-1-s.ivanov@proxmox.com>
References: <20221117150611.253644-1-s.ivanov@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results: =?UTF-8?Q?0=0A=09?=AWL 0.170 Adjusted
 score from AWL reputation of From: =?UTF-8?Q?address=0A=09?=BAYES_00 -1.9
 Bayes spam probability is 0 to 1%
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict
 =?UTF-8?Q?Alignment=0A=09?=SPF_HELO_NONE 0.001 SPF: HELO does not publish an
 SPF =?UTF-8?Q?Record=0A=09?=SPF_PASS -0.001 SPF: sender matches SPF record
Subject: [pmg-devel] [PATCH pmg-api v2 5/8] partially fix #2465: handle
 smtputf8 addresses in the rule-system
X-BeenThere: pmg-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Mail Gateway development discussion
 <pmg-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pmg-devel/>
List-Post: <mailto:pmg-devel@lists.proxmox.com>
List-Help: <mailto:pmg-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Thu, 17 Nov 2022 15:06:50 -0000

the envelope addresses are used in the rule-system for lookups and
statistics. When the mail is received with smtputf8 the addresses are
decoded (multi-byte perl-strings) and thus need encoding before using
them as parameter in a database query.

This patch encodes the addresses as utf-8 for the relevant queries
unconditionally, because envelope-senders should either be:
* (a subset of) ascii (no smtputf8) - which is invariant for utf-8
  encoding
* valid utf-8 (smtputf8)

The patch does not address the issues with multi-byte addresses in our
LDAP-implementation (hence the partial fix), but should still be an
improvment for many deployments

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
 src/PMG/MailQueue.pm    | 10 ++++++----
 src/PMG/RuleDB/Spam.pm  |  5 +++--
 src/bin/pmg-smtp-filter |  5 +++--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/PMG/MailQueue.pm b/src/PMG/MailQueue.pm
index 2841b07..8355c30 100644
--- a/src/PMG/MailQueue.pm
+++ b/src/PMG/MailQueue.pm
@@ -6,6 +6,7 @@ use warnings;
 use PVE::SafeSyslog;
 use MIME::Parser;
 use IO::File;
+use Encode;
 use File::Sync;
 use File::Basename;
 use File::Path;
@@ -141,6 +142,7 @@ sub quarantinedb_insert {
     my ($self, $ruledb, $lcid, $ldap, $qtype, $header, $sender, $file, $targets, $vars) = @_;
 
     eval {
+	$sender = encode('UTF-8', $sender);
 	my $dbh = $ruledb->{dbh};
 
 	my $insert_cmds = "SELECT nextval ('cmailstore_id_seq'); INSERT INTO CMailStore " .
@@ -188,11 +190,11 @@ sub quarantinedb_insert {
 	    if ($pmail eq lc ($r)) {
 		$receiver = "NULL";
 	    } else {
-		$receiver = $dbh->quote ($r);
+		$receiver = $dbh->quote (encode('UTF-8', $r));
 	    }
 
 
-	    $pmail = $dbh->quote ($pmail);
+	    $pmail = $dbh->quote (encode('UTF-8', $pmail));
 	    $insert_cmds .= "INSERT INTO CMSReceivers " .
 		"(CMailStore_CID, CMailStore_RID, PMail, Receiver, TicketID, Status, MTime) " .
 		"VALUES ($lcid, currval ('cmailstore_id_seq'), $pmail, $receiver, $tid, 'N', $now); ";
@@ -294,8 +296,8 @@ sub quarantine_mail {
 	$entity->head->delete ('Return-Path');
 
 	# prepend Delivered-To and Return-Path (like QMAIL MAILDIR FORMAT)
-	$entity->head->add ('Return-Path', join (',', $sender), 0);
-	$entity->head->add ('Delivered-To', join (',', @$tg), 0);
+	$entity->head->add ('Return-Path', encode('UTF-8', join (',', $sender)), 0);
+	$entity->head->add ('Delivered-To', encode('UTF-8', join (',', @$tg)), 0);
 
 	$entity->print ($fh);
 
diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm
index cc9a347..b7e7dd4 100644
--- a/src/PMG/RuleDB/Spam.pm
+++ b/src/PMG/RuleDB/Spam.pm
@@ -4,6 +4,7 @@ use strict;
 use warnings;
 use DBI;
 use Digest::SHA;
+use Encode qw(encode decode);
 use Time::HiRes qw (gettimeofday);
 
 use PVE::SafeSyslog;
@@ -135,8 +136,8 @@ sub get_blackwhite {
     my $cond = '';
     foreach my $r (@$targets) {
 	my $pmail = $msginfo->{pmail}->{$r} || lc ($r);
-	my $qr = $dbh->quote ($pmail);
-	$cond .= " OR " if $cond;  
+	my $qr = $dbh->quote (encode('UTF-8', $pmail));
+	$cond .= " OR " if $cond;
 	$cond .= "pmail = $qr";
     }	 
 
diff --git a/src/bin/pmg-smtp-filter b/src/bin/pmg-smtp-filter
index 45e68a7..bb8e264 100755
--- a/src/bin/pmg-smtp-filter
+++ b/src/bin/pmg-smtp-filter
@@ -4,6 +4,7 @@ use strict;
 use warnings;
 
 use Carp;
+use Encode qw(encode decode);
 use Getopt::Long;
 use Time::HiRes qw (usleep gettimeofday tv_interval);
 use POSIX qw(:sys_wait_h errno_h signal_h);
@@ -791,10 +792,10 @@ sub handle_smtp {
 	$insert_cmds .= ($queue->{sa_score} || 0) . ',';
 	$insert_cmds .= $dbh->quote($queue->{vinfo}) . ',';
 	$insert_cmds .= $time_total . ',';
-	$insert_cmds .= $dbh->quote($msginfo->{sender}) . ');';
+	$insert_cmds .= $dbh->quote(encode('UTF-8', $msginfo->{sender})) . ');';
 
 	foreach my $r (@{$msginfo->{targets}}) {
-	    my $tmp = $dbh->quote($r);
+	    my $tmp = $dbh->quote(encode('UTF-8',$r));
 	    my $blocked = $queue->{status}->{$r} eq 'blocked' ? 1 : 0;
 	    $insert_cmds .= "INSERT INTO CReceivers (CStatistic_CID, CStatistic_RID, Receiver, Blocked) " .
 		"VALUES ($lcid, currval ('cstatistic_id_seq'), $tmp, '$blocked'); ";
-- 
2.30.2