From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 48E9C92491
 for <pmg-devel@lists.proxmox.com>; Thu,  1 Feb 2024 16:37:32 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 795C912DA0
 for <pmg-devel@lists.proxmox.com>; Thu,  1 Feb 2024 16:37:01 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pmg-devel@lists.proxmox.com>; Thu,  1 Feb 2024 16:36:59 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 00FE341F79
 for <pmg-devel@lists.proxmox.com>; Thu,  1 Feb 2024 16:36:59 +0100 (CET)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pmg-devel@lists.proxmox.com
Date: Thu,  1 Feb 2024 16:36:49 +0100
Message-Id: <20240201153657.1067215-4-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20240201153657.1067215-1-d.csapak@proxmox.com>
References: <20240201153657.1067215-1-d.csapak@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.020 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pmg-devel] [RFC PATCH pmg-api 03/11] RuleCache: reorganize how we
 gather marks and spaminfo
X-BeenThere: pmg-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Mail Gateway development discussion
 <pmg-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pmg-devel/>
List-Post: <mailto:pmg-devel@lists.proxmox.com>
List-Help: <mailto:pmg-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Thu, 01 Feb 2024 15:37:32 -0000

instead of collecting the spaminfo (+match) seperately, collect this
per target together with the regular marks. With this, we can omit the
'global' marks list, since each target has their own anyway.

We want this, since when we'll implement and/invert for matches, the marks
can differ between targets, since the spamlevel can diverge for them and
that can be and-combined with objects that add marks. For that to be
possible we have to save each match + info per target instead of
globally.

Since we don't change the actual matching behaviour with this patch,
for the remove action, we can simply use the marks from the first target
(as they currently have to be identical).

Conversely, we currently save the spaminfo per target, but later in
pmg-smtp-filter we only ever use the first one we encounter, so instead
save it only the first time and use that.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/PMG/RuleCache.pm     | 32 ++++++++++----------------------
 src/PMG/RuleDB/Remove.pm | 19 +++++++++++++++----
 src/bin/pmg-smtp-filter  | 18 +++++-------------
 3 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/src/PMG/RuleCache.pm b/src/PMG/RuleCache.pm
index fd22a16..4f7ebe7 100644
--- a/src/PMG/RuleCache.pm
+++ b/src/PMG/RuleCache.pm
@@ -304,37 +304,25 @@ sub what_match {
     if (scalar($what->{groups}->@*) == 0) {
 	# match all targets
 	foreach my $target (@{$msginfo->{targets}}) {
-	    $res->{$target}->{marks} = [];
+	    $res->{targets}->{$target}->{marks} = [];
 	}
-
-	$res->{marks} = [];
 	return $res;
     }
 
-    my $marks;
-
     for my $group ($what->{groups}->@*) {
 	for my $obj ($group->{objects}->@*) {
 	    if (!$obj->can('what_match_targets')) {
 		if (my $match = $obj->what_match($queue, $element, $msginfo, $dbh)) {
-		    push @$marks, @$match;
+		    for my $target ($msginfo->{targets}->@*) {
+			push $res->{targets}->{$target}->{marks}->@*, $match->@*;
+		    }
 		}
-	    }
-	}
-    }
-
-    foreach my $target (@{$msginfo->{targets}}) {
-	$res->{$target}->{marks} = $marks;
-	$res->{marks} = $marks;
-    }
-
-    for my $group ($what->{groups}->@*) {
-	for my $obj ($group->{objects}->@*) {
-	    if ($obj->can ("what_match_targets")) {
-		my $target_info;
-		if ($target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) {
-		    foreach my $k (keys %$target_info) {
-			$res->{$k} = $target_info->{$k};
+	    } else {
+		if (my $target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) {
+		    foreach my $k (keys $target_info->%*) {
+			push $res->{targets}->{$k}->{marks}->@*, $target_info->{$k}->{marks}->@*;
+			# only save spaminfo once
+			$res->{spaminfo} = $target_info->{$k}->{spaminfo} if !defined($res->{spaminfo});
 		    }
 		}
 	    }
diff --git a/src/PMG/RuleDB/Remove.pm b/src/PMG/RuleDB/Remove.pm
index e7c353c..5812602 100644
--- a/src/PMG/RuleDB/Remove.pm
+++ b/src/PMG/RuleDB/Remove.pm
@@ -198,9 +198,15 @@ sub execute {
 
     my $rulename = encode('UTF-8', $vars->{RULE} // 'unknown');
 
-    if (!$self->{all} && ($#$marks == -1)) {
-	# no marks
-	return;
+    if (!$self->{all}) {
+	my $found_mark = 0;
+	for my $target (keys $marks->{targets}->%*) {
+	    if (scalar($marks->{targets}->{$target}->{marks}->@*) > 0) {
+		$found_mark = 1;
+		last;
+	    }
+	}
+	return if !$found_mark;
     }
 
     my $subgroups = $mod_group->subgroups ($targets);
@@ -256,7 +262,12 @@ sub execute {
 	}
 
 	$self->{message_seen} = 0;
-	$self->delete_marked_parts($queue, $entity, $html, $rtype, $marks, $rulename);
+
+	# since all matches are or combinded, marks for all targets must be the same if they exist
+	# so simply use the first one here
+	my $match_marks = $marks->{targets}->{$tg->[0]}->{marks};
+
+	$self->delete_marked_parts($queue, $entity, $html, $rtype, $match_marks, $rulename);
 	delete $self->{message_seen};
 
 	if ($msginfo->{testmode}) {
diff --git a/src/bin/pmg-smtp-filter b/src/bin/pmg-smtp-filter
index 7da3de8..71043b0 100755
--- a/src/bin/pmg-smtp-filter
+++ b/src/bin/pmg-smtp-filter
@@ -276,8 +276,9 @@ sub apply_rules {
 	foreach my $target (@{$msginfo->{targets}}) {
 	    next if $final->{$target};
 	    next if !defined ($rule_marks{$rule->{id}});
-	    next if !defined ($rule_marks{$rule->{id}}->{$target});
-	    next if !defined ($rule_marks{$rule->{id}}->{$target}->{marks});
+	    next if !defined ($rule_marks{$rule->{id}}->{targets});
+	    next if !defined ($rule_marks{$rule->{id}}->{targets}->{$target});
+	    next if !defined ($rule_marks{$rule->{id}}->{targets}->{$target}->{marks});
 	    next if !$rulecache->to_match ($rule->{id}, $target, $ldap);
 
 	    $final->{$target} = $fin;
@@ -320,24 +321,15 @@ sub apply_rules {
 	my $targets = $rule_targets{$rule->{id}};
 	next if !$targets;
 
-	my $spaminfo;
-	foreach my $t (@$targets) {
-	    if ($rule_marks{$rule->{id}}->{$t} && $rule_marks{$rule->{id}}->{$t}->{spaminfo}) {
-		$spaminfo = $rule_marks{$rule->{id}}->{$t}->{spaminfo};
-		# we assume spam info is the same for all matching targets
-		last;
-	    }
-	}
-
 	my $vars = $self->get_prox_vars (
-	    $queue, $entity, $msginfo, $rule, $rule_targets{$rule->{id}}, $spaminfo);
+	    $queue, $entity, $msginfo, $rule, $rule_targets{$rule->{id}}, $rule_marks{$rule->{id}}->{spaminfo});
 
 	my @sorted_actions = sort {$a->priority <=> $b->priority} @{$rule_actions{$rule->{id}}};
 
 	foreach my $action (@sorted_actions) {
 	    $action->execute(
 		$queue, $self->{ruledb}, $mod_group, $rule_targets{$rule->{id}}, $msginfo, $vars,
-		$rule_marks{$rule->{id}}->{marks}, $ldap
+		$rule_marks{$rule->{id}}, $ldap
 	    );
 	    last if $action->final;
 	}
-- 
2.30.2