From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 4CA8494275 for ; Fri, 9 Feb 2024 13:55:12 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 463E6396A0 for ; Fri, 9 Feb 2024 13:54:46 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Fri, 9 Feb 2024 13:54:44 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 059784682C for ; Fri, 9 Feb 2024 13:54:44 +0100 (CET) From: Dominik Csapak To: pmg-devel@lists.proxmox.com Date: Fri, 9 Feb 2024 13:54:35 +0100 Message-Id: <20240209125440.2572239-12-d.csapak@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20240209125440.2572239-1-d.csapak@proxmox.com> References: <20240209125440.2572239-1-d.csapak@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.020 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pmg-devel] [PATCH pmg-api 11/12] RuleCache: implement and/invert for what matches X-BeenThere: pmg-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Mail Gateway development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 09 Feb 2024 12:55:12 -0000 Since what matches are not a simple boolean match, but also can contain "marks" to mark specific parts of the mail, we must implement some custom logic for and/invert here. The goal here is to define that groups are on a per part level, but the rule operates on the whole mail. To achieve this we have two different and/invert combine functions, one for the group level and one for the whole what match. For per group and/inversion we and 'and-combine' and invert the list of marks, so if it matches part 1,2 of 1,2,3 the inversion would return 3. For the rule it only matters if the and/inversion part matches at all, regardless of the marks. If it matches, the marks will be or'ed. With this, one can represent many different scenarios that were not possible before. Signed-off-by: Dominik Csapak --- src/PMG/RuleCache.pm | 165 +++++++++++++++++++++++++++++++++++++-- src/PMG/RuleDB/Remove.pm | 13 ++- 2 files changed, 168 insertions(+), 10 deletions(-) diff --git a/src/PMG/RuleCache.pm b/src/PMG/RuleCache.pm index 7d08107..7affa81 100644 --- a/src/PMG/RuleCache.pm +++ b/src/PMG/RuleCache.pm @@ -336,29 +336,147 @@ sub what_match { return $res; } + my $what_matches = {}; + for my $group ($what->{groups}->@*) { + my $group_matches = {}; + my $and = $group->{and}; + my $invert = $group->{invert}; for my $obj ($group->{objects}->@*) { if (!$obj->can('what_match_targets')) { - if (my $match = $obj->what_match($queue, $element, $msginfo, $dbh)) { - for my $target ($msginfo->{targets}->@*) { - push $res->{targets}->{$target}->{marks}->@*, $match->@*; + my $match = $obj->what_match($queue, $element, $msginfo, $dbh); + for my $target ($msginfo->{targets}->@*) { + if (defined($match)) { + push $group_matches->{$target}->@*, $match; + } else { + push $group_matches->{$target}->@*, undef; } } } else { - if (my $target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh)) { - foreach my $k (keys $target_info->%*) { - push $res->{targets}->{$k}->{marks}->@*, $target_info->{$k}->{marks}->@*; + my $target_info = $obj->what_match_targets($queue, $element, $msginfo, $dbh); + for my $target ($msginfo->{targets}->@*) { + my $match = $target_info->{$target}; + if (defined($match)) { + push $group_matches->{$target}->@*, $match->{marks}; # only save spaminfo once - $res->{spaminfo} = $target_info->{$k}->{spaminfo} if !defined($res->{spaminfo}); + $res->{spaminfo} = $match->{spaminfo} if !defined($res->{spaminfo}); + } else { + push $group_matches->{$target}->@*, undef; } } } } + + for my $target (keys $group_matches->%*) { + my $matches = group_match_and_invert($group_matches->{$target}, $and, $invert, $msginfo); + push $what_matches->{$target}->@*, $matches; + } + } + + for my $target (keys $what_matches->%*) { + my $target_marks = what_match_and_invert($what_matches->{$target}, $what->{and}, $what->{invert}); + next if !defined($target_marks); + $res->{targets}->{$target}->{marks} = $target_marks; } return $res; } +# combines matches of groups +# this is only binary, and if it matches, 'or' combines the marks +# so that all found marks are included +# +# this way we can create rules like: +# +# --- +# What is and combined: +# group1: match filename .*\.pdf +# group2: spamlevel >= 3 +# ACTION: remove attachments +# --- +# which would remove attachments for all *.pdf filenames where +# the spamlevel is >= 3 +sub what_match_and_invert($$$) { + my ($matches, $and, $invert) = @_; + + my $match_result = match_list_with_mode($matches, $and, $invert, sub { + my ($match) = @_; + return defined($match); + }); + + if ($match_result) { + my $res = []; + for my $match ($matches->@*) { + push $res->@*, $match->@* if defined($match); + } + return $res; + } else { + return undef; + } +} + +# combines group matches according to and/invert +# since we want match groups per mime part, we must +# look at the marks and possibly invert them +sub group_match_and_invert($$$$) { + my ($group_matches, $and, $invert, $msginfo) = @_; + + my $encountered_parts = 0; + if ($and) { + my $set = {}; + my $count = scalar($group_matches->@*); + for my $match ($group_matches->@*) { + if (!defined($match)) { + $set = {}; + last; + } + + if (scalar($match->@*) > 0) { + $encountered_parts = 1; + $set->{$_}++ for $match->@*; + } else { + $set->{$_}++ for (1..$msginfo->{max_aid}); + } + } + + $group_matches = undef; + for my $key (keys $set->%*) { + if ($set->{$key} == $count) { + push $group_matches->@*, $key; + } + } + if (defined($group_matches) && scalar($group_matches->@*) == $count && !$encountered_parts) { + $group_matches = []; + } + } else { + my $set = {}; + for my $match ($group_matches->@*) { + next if !defined($match); + if (scalar($match->@*) == 0) { + $set->{$_} = 1 for (1..$msginfo->{max_aid}); + } else { + $encountered_parts = 1; + $set->{$_} = 1 for $match->@*; + } + } + + my $count = scalar(keys $set->%*); + if ($count == $msginfo->{max_aid} && !$encountered_parts) { + $group_matches = []; + } elsif ($count == 0) { + $group_matches = undef; + } else { + $group_matches = [keys $set->%*]; + } + } + + if ($invert) { + $group_matches = invert_mark_list($group_matches, $msginfo->{max_aid}); + } + + return $group_matches; +} + # calls sub with each element of $list, and and/ors/inverts the result sub match_list_with_mode($$$$) { my ($list, $and, $invert, $sub) = @_; @@ -378,4 +496,37 @@ sub match_list_with_mode($$$$) { return $and != $invert; } +# inverts a list of marks with the remaining ones of the mail +# examples: +# mail has [1,2,3,4,5] +# +# undef => [1,2,3,4,5] +# [1,2] => [3,4,5] +# [1,2,3,4,5] => undef +# [] => undef // [] means the whole mail matched +sub invert_mark_list($$) { + my ($list, $max_aid) = @_; + + if (defined($list)) { + my $length = scalar($list->@*); + if ($length == 0 || $length == ($max_aid - 1)) { + return undef; + } + } + + $list //= []; + + my $set = {}; + $set->{$_} = 1 for $list->@*; + + my $new_list = []; + for (my $i = 1; $i <= $max_aid; $i++) { + if (!$set->{$i}) { + push $new_list->@*, $i; + } + } + + return $new_list; +} + 1; diff --git a/src/PMG/RuleDB/Remove.pm b/src/PMG/RuleDB/Remove.pm index 5812602..c9fd157 100644 --- a/src/PMG/RuleDB/Remove.pm +++ b/src/PMG/RuleDB/Remove.pm @@ -209,7 +209,14 @@ sub execute { return if !$found_mark; } - my $subgroups = $mod_group->subgroups ($targets); + my $subgroups; + if ($marks->{spaminfo}) { + # when there was a spam check in the rule, we might have different marks for + # different targets, so simply copy the mail for each target that matches + $subgroups = $mod_group->explode($targets); + } else { + $subgroups = $mod_group->subgroups ($targets); + } my $html = PMG::Utils::subst_values($self->{text}, $vars); @@ -263,8 +270,8 @@ sub execute { $self->{message_seen} = 0; - # since all matches are or combinded, marks for all targets must be the same if they exist - # so simply use the first one here + # if there was spam check in this rule, the marks must always be the same, + # otherwise we get a subgroup for each target anyway my $match_marks = $marks->{targets}->{$tg->[0]}->{marks}; $self->delete_marked_parts($queue, $entity, $html, $rtype, $match_marks, $rulename); -- 2.30.2