From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 5B72E94009 for ; Wed, 21 Feb 2024 13:25:13 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 40D8216C1B for ; Wed, 21 Feb 2024 13:24:43 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 21 Feb 2024 13:24:41 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id E95F54448B for ; Wed, 21 Feb 2024 13:24:40 +0100 (CET) From: Dominik Csapak To: pmg-devel@lists.proxmox.com Date: Wed, 21 Feb 2024 13:24:27 +0100 Message-Id: <20240221122439.1281024-2-d.csapak@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20240221122439.1281024-1-d.csapak@proxmox.com> References: <20240221122439.1281024-1-d.csapak@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.020 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pmg-devel] [PATCH pmg-api v2 01/10] RuleCache: reorganize how we gather marks and spaminfo X-BeenThere: pmg-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Mail Gateway development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Feb 2024 12:25:13 -0000 currently, we gather the marks and the spaminfo separately, with the following properties: * both are gathered with what matches * virus and spam matches are global (i.e. not marks but [] to mark the whole mail) * only the spam match can differ per user (wl/bl) * all matches are or'ed (so order and global/per part does not make much of a difference for matching the rule) * marks are saved once globally per rule and per target * spaminfo is saved per target instead collect spaminfo once and marks per target together. With this, we can omit the 'global' marks list, since each target has their own anyway. Also saves the spaminfo only once, since that must be the same for the mail regardless of target. Since that shouldn't change the current matching behavior (all marks are identical for the whole rule), we can simply use the marks from the first target in `Remove` (the only action where the marks are relevant). This makes it easier for us when we'll implement and/invert for matches, as the marks can differ between targets. That is, because the spamlevel can diverge for them and that can be and-combined with objects that add marks. Signed-off-by: Dominik Csapak --- changes from v1: * improved commit message * return marks and spaminfo seperately, this reduces some nesting of objects * improve/remove outdated comments src/PMG/RuleCache.pm | 43 +++++++++++++--------------------------- src/PMG/RuleDB/Remove.pm | 18 +++++++++++++---- src/bin/pmg-smtp-filter | 20 +++++++------------ 3 files changed, 35 insertions(+), 46 deletions(-) diff --git a/src/PMG/RuleCache.pm b/src/PMG/RuleCache.pm index fd22a16..cd56342 100644 --- a/src/PMG/RuleCache.pm +++ b/src/PMG/RuleCache.pm @@ -295,53 +295,38 @@ sub what_match { my $what = $self->{"$ruleid:what"}; - my $res; - - # $res->{marks} is used by mark specific actions like remove-attachments - # $res->{$target}->{marks} is only used in apply_rules() to exclude some - # targets (spam blacklist and whitelist) + my $marks; + my $spaminfo; if (scalar($what->{groups}->@*) == 0) { # match all targets foreach my $target (@{$msginfo->{targets}}) { - $res->{$target}->{marks} = []; + $marks->{$target} = []; } - - $res->{marks} = []; - return $res; + return ($marks, $spaminfo); } - my $marks; - for my $group ($what->{groups}->@*) { for my $obj ($group->{objects}->@*) { if (!$obj->can('what_match_targets')) { if (my $match = $obj->what_match($queue, $element, $msginfo, $dbh)) { - push @$marks, @$match; + for my $target ($msginfo->{targets}->@*) { + push $marks->{$target}->@*, $match->@*; + } } - } - } - } - - foreach my $target (@{$msginfo->{targets}}) { - $res->{$target}->{marks} = $marks; - $res->{marks} = $marks; - } - - for my $group ($what->{groups}->@*) { - for my $obj ($group->{objects}->@*) { - if ($obj->can ("what_match_targets")) { - my $target_info; - if ($target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) { - foreach my $k (keys %$target_info) { - $res->{$k} = $target_info->{$k}; + } else { + if (my $target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) { + foreach my $k (keys $target_info->%*) { + push $marks->{$k}->@*, $target_info->{$k}->{marks}->@*; + # only save spaminfo once + $spaminfo = $target_info->{$k}->{spaminfo} if !defined($spaminfo); } } } } } - return $res; + return ($marks, $spaminfo); } 1; diff --git a/src/PMG/RuleDB/Remove.pm b/src/PMG/RuleDB/Remove.pm index e7c353c..3acc861 100644 --- a/src/PMG/RuleDB/Remove.pm +++ b/src/PMG/RuleDB/Remove.pm @@ -198,9 +198,15 @@ sub execute { my $rulename = encode('UTF-8', $vars->{RULE} // 'unknown'); - if (!$self->{all} && ($#$marks == -1)) { - # no marks - return; + if (!$self->{all}) { + my $found_mark = 0; + for my $target (keys $marks->%*) { + if (scalar($marks->{$target}->@*) > 0) { + $found_mark = 1; + last; + } + } + return if !$found_mark; } my $subgroups = $mod_group->subgroups ($targets); @@ -256,7 +262,11 @@ sub execute { } $self->{message_seen} = 0; - $self->delete_marked_parts($queue, $entity, $html, $rtype, $marks, $rulename); + + # since currently all marks are equal for all target, just use the first one + my $match_marks = $marks->{$tg->[0]}; + + $self->delete_marked_parts($queue, $entity, $html, $rtype, $match_marks, $rulename); delete $self->{message_seen}; if ($msginfo->{testmode}) { diff --git a/src/bin/pmg-smtp-filter b/src/bin/pmg-smtp-filter index 7da3de8..cc2fcd6 100755 --- a/src/bin/pmg-smtp-filter +++ b/src/bin/pmg-smtp-filter @@ -231,6 +231,7 @@ sub apply_rules { my %rule_targets; my %rule_actions; my %rule_marks; + my %rule_spaminfo; my $matching_rules = []; my $rulecache = $self->{rulecache}; @@ -266,9 +267,12 @@ sub apply_rules { next; } - $rule_marks{$rule->{id}} = + my ($marks, $spaminfo) = $rulecache->what_match ($rule->{id}, $queue, $entity, $msginfo, $dbh); + $rule_marks{$rule->{id}} = $marks; + $rule_spaminfo{$rule->{id}} = $spaminfo; + $rule_actions{$rule->{id}} = $rulecache->get_actions ($rule->{id}); my $fin = $rulecache->final ($rule->{id}); @@ -277,7 +281,6 @@ sub apply_rules { next if $final->{$target}; next if !defined ($rule_marks{$rule->{id}}); next if !defined ($rule_marks{$rule->{id}}->{$target}); - next if !defined ($rule_marks{$rule->{id}}->{$target}->{marks}); next if !$rulecache->to_match ($rule->{id}, $target, $ldap); $final->{$target} = $fin; @@ -320,24 +323,15 @@ sub apply_rules { my $targets = $rule_targets{$rule->{id}}; next if !$targets; - my $spaminfo; - foreach my $t (@$targets) { - if ($rule_marks{$rule->{id}}->{$t} && $rule_marks{$rule->{id}}->{$t}->{spaminfo}) { - $spaminfo = $rule_marks{$rule->{id}}->{$t}->{spaminfo}; - # we assume spam info is the same for all matching targets - last; - } - } - my $vars = $self->get_prox_vars ( - $queue, $entity, $msginfo, $rule, $rule_targets{$rule->{id}}, $spaminfo); + $queue, $entity, $msginfo, $rule, $rule_targets{$rule->{id}}, $rule_spaminfo{$rule->{id}}); my @sorted_actions = sort {$a->priority <=> $b->priority} @{$rule_actions{$rule->{id}}}; foreach my $action (@sorted_actions) { $action->execute( $queue, $self->{ruledb}, $mod_group, $rule_targets{$rule->{id}}, $msginfo, $vars, - $rule_marks{$rule->{id}}->{marks}, $ldap + $rule_marks{$rule->{id}}, $ldap ); last if $action->final; } -- 2.30.2