From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <d.csapak@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 5B72E94009
 for <pmg-devel@lists.proxmox.com>; Wed, 21 Feb 2024 13:25:13 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 40D8216C1B
 for <pmg-devel@lists.proxmox.com>; Wed, 21 Feb 2024 13:24:43 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pmg-devel@lists.proxmox.com>; Wed, 21 Feb 2024 13:24:41 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id E95F54448B
 for <pmg-devel@lists.proxmox.com>; Wed, 21 Feb 2024 13:24:40 +0100 (CET)
From: Dominik Csapak <d.csapak@proxmox.com>
To: pmg-devel@lists.proxmox.com
Date: Wed, 21 Feb 2024 13:24:27 +0100
Message-Id: <20240221122439.1281024-2-d.csapak@proxmox.com>
X-Mailer: git-send-email 2.30.2
In-Reply-To: <20240221122439.1281024-1-d.csapak@proxmox.com>
References: <20240221122439.1281024-1-d.csapak@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.020 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: [pmg-devel] [PATCH pmg-api v2 01/10] RuleCache: reorganize how we
 gather marks and spaminfo
X-BeenThere: pmg-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Mail Gateway development discussion
 <pmg-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pmg-devel/>
List-Post: <mailto:pmg-devel@lists.proxmox.com>
List-Help: <mailto:pmg-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 21 Feb 2024 12:25:13 -0000

currently, we gather the marks and the spaminfo separately,
with the following properties:
* both are gathered with what matches
* virus and spam matches are global (i.e. not marks but [] to mark the
  whole mail)
* only the spam match can differ per user (wl/bl)
* all matches are or'ed (so order and global/per part does not make much
  of a difference for matching the rule)
* marks are saved once globally per rule and per target
* spaminfo is saved per target

instead collect spaminfo once and marks per target together. With this,
we can omit the 'global' marks list, since each target has their own
anyway. Also saves the spaminfo only once, since that must be the same
for the mail regardless of target.

Since that shouldn't change the current matching behavior (all marks
are identical for the whole rule), we can simply use the marks from the
first target in `Remove` (the only action where the marks are relevant).

This makes it easier for us when we'll implement and/invert for matches,
as the marks can differ between targets. That is, because the spamlevel
can diverge for them and that can be and-combined with objects that add
marks.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
changes from v1:
* improved commit message
* return marks and spaminfo seperately, this reduces some nesting of objects
* improve/remove outdated comments

 src/PMG/RuleCache.pm     | 43 +++++++++++++---------------------------
 src/PMG/RuleDB/Remove.pm | 18 +++++++++++++----
 src/bin/pmg-smtp-filter  | 20 +++++++------------
 3 files changed, 35 insertions(+), 46 deletions(-)

diff --git a/src/PMG/RuleCache.pm b/src/PMG/RuleCache.pm
index fd22a16..cd56342 100644
--- a/src/PMG/RuleCache.pm
+++ b/src/PMG/RuleCache.pm
@@ -295,53 +295,38 @@ sub what_match {
 
     my $what = $self->{"$ruleid:what"};
 
-    my $res;
-
-    # $res->{marks} is used by mark specific actions like remove-attachments
-    # $res->{$target}->{marks} is only used in apply_rules() to exclude some
-    # targets (spam blacklist and whitelist)
+    my $marks;
+    my $spaminfo;
 
     if (scalar($what->{groups}->@*) == 0) {
 	# match all targets
 	foreach my $target (@{$msginfo->{targets}}) {
-	    $res->{$target}->{marks} = [];
+	    $marks->{$target} = [];
 	}
-
-	$res->{marks} = [];
-	return $res;
+	return ($marks, $spaminfo);
     }
 
-    my $marks;
-
     for my $group ($what->{groups}->@*) {
 	for my $obj ($group->{objects}->@*) {
 	    if (!$obj->can('what_match_targets')) {
 		if (my $match = $obj->what_match($queue, $element, $msginfo, $dbh)) {
-		    push @$marks, @$match;
+		    for my $target ($msginfo->{targets}->@*) {
+			push $marks->{$target}->@*, $match->@*;
+		    }
 		}
-	    }
-	}
-    }
-
-    foreach my $target (@{$msginfo->{targets}}) {
-	$res->{$target}->{marks} = $marks;
-	$res->{marks} = $marks;
-    }
-
-    for my $group ($what->{groups}->@*) {
-	for my $obj ($group->{objects}->@*) {
-	    if ($obj->can ("what_match_targets")) {
-		my $target_info;
-		if ($target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) {
-		    foreach my $k (keys %$target_info) {
-			$res->{$k} = $target_info->{$k};
+	    } else {
+		if (my $target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) {
+		    foreach my $k (keys $target_info->%*) {
+			push $marks->{$k}->@*, $target_info->{$k}->{marks}->@*;
+			# only save spaminfo once
+			$spaminfo = $target_info->{$k}->{spaminfo} if !defined($spaminfo);
 		    }
 		}
 	    }
 	}
     }
 
-    return $res;
+    return ($marks, $spaminfo);
 }
 
 1;
diff --git a/src/PMG/RuleDB/Remove.pm b/src/PMG/RuleDB/Remove.pm
index e7c353c..3acc861 100644
--- a/src/PMG/RuleDB/Remove.pm
+++ b/src/PMG/RuleDB/Remove.pm
@@ -198,9 +198,15 @@ sub execute {
 
     my $rulename = encode('UTF-8', $vars->{RULE} // 'unknown');
 
-    if (!$self->{all} && ($#$marks == -1)) {
-	# no marks
-	return;
+    if (!$self->{all}) {
+	my $found_mark = 0;
+	for my $target (keys $marks->%*) {
+	    if (scalar($marks->{$target}->@*) > 0) {
+		$found_mark = 1;
+		last;
+	    }
+	}
+	return if !$found_mark;
     }
 
     my $subgroups = $mod_group->subgroups ($targets);
@@ -256,7 +262,11 @@ sub execute {
 	}
 
 	$self->{message_seen} = 0;
-	$self->delete_marked_parts($queue, $entity, $html, $rtype, $marks, $rulename);
+
+	# since currently all marks are equal for all target, just use the first one
+	my $match_marks = $marks->{$tg->[0]};
+
+	$self->delete_marked_parts($queue, $entity, $html, $rtype, $match_marks, $rulename);
 	delete $self->{message_seen};
 
 	if ($msginfo->{testmode}) {
diff --git a/src/bin/pmg-smtp-filter b/src/bin/pmg-smtp-filter
index 7da3de8..cc2fcd6 100755
--- a/src/bin/pmg-smtp-filter
+++ b/src/bin/pmg-smtp-filter
@@ -231,6 +231,7 @@ sub apply_rules {
     my %rule_targets;
     my %rule_actions;
     my %rule_marks;
+    my %rule_spaminfo;
     my $matching_rules = [];
 
     my $rulecache = $self->{rulecache};
@@ -266,9 +267,12 @@ sub apply_rules {
 	    next;
 	}
 
-	$rule_marks{$rule->{id}} =
+	my ($marks, $spaminfo) =
 	    $rulecache->what_match ($rule->{id}, $queue, $entity, $msginfo, $dbh);
 
+	$rule_marks{$rule->{id}} = $marks;
+	$rule_spaminfo{$rule->{id}} = $spaminfo;
+
 	$rule_actions{$rule->{id}} = $rulecache->get_actions ($rule->{id});
 	my $fin = $rulecache->final ($rule->{id});
 
@@ -277,7 +281,6 @@ sub apply_rules {
 	    next if $final->{$target};
 	    next if !defined ($rule_marks{$rule->{id}});
 	    next if !defined ($rule_marks{$rule->{id}}->{$target});
-	    next if !defined ($rule_marks{$rule->{id}}->{$target}->{marks});
 	    next if !$rulecache->to_match ($rule->{id}, $target, $ldap);
 
 	    $final->{$target} = $fin;
@@ -320,24 +323,15 @@ sub apply_rules {
 	my $targets = $rule_targets{$rule->{id}};
 	next if !$targets;
 
-	my $spaminfo;
-	foreach my $t (@$targets) {
-	    if ($rule_marks{$rule->{id}}->{$t} && $rule_marks{$rule->{id}}->{$t}->{spaminfo}) {
-		$spaminfo = $rule_marks{$rule->{id}}->{$t}->{spaminfo};
-		# we assume spam info is the same for all matching targets
-		last;
-	    }
-	}
-
 	my $vars = $self->get_prox_vars (
-	    $queue, $entity, $msginfo, $rule, $rule_targets{$rule->{id}}, $spaminfo);
+	    $queue, $entity, $msginfo, $rule, $rule_targets{$rule->{id}}, $rule_spaminfo{$rule->{id}});
 
 	my @sorted_actions = sort {$a->priority <=> $b->priority} @{$rule_actions{$rule->{id}}};
 
 	foreach my $action (@sorted_actions) {
 	    $action->execute(
 		$queue, $self->{ruledb}, $mod_group, $rule_targets{$rule->{id}}, $msginfo, $vars,
-		$rule_marks{$rule->{id}}->{marks}, $ldap
+		$rule_marks{$rule->{id}}, $ldap
 	    );
 	    last if $action->final;
 	}
-- 
2.30.2