From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pmg-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id 263991FF16E
	for <inbox@lore.proxmox.com>; Mon, 17 Feb 2025 14:04:02 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id C031D1F85B;
	Mon, 17 Feb 2025 14:03:59 +0100 (CET)
Message-ID: <2f1ce944-ef0c-4d48-8fef-6e31364dab66@proxmox.com>
Date: Mon, 17 Feb 2025 14:03:55 +0100
MIME-Version: 1.0
User-Agent: Mozilla Thunderbird Beta
To: Stoiko Ivanov <s.ivanov@proxmox.com>, pmg-devel@lists.proxmox.com
References: <20250212151241.91077-1-s.ivanov@proxmox.com>
 <20250212151241.91077-3-s.ivanov@proxmox.com>
Content-Language: en-US
From: Dominik Csapak <d.csapak@proxmox.com>
In-Reply-To: <20250212151241.91077-3-s.ivanov@proxmox.com>
X-SPAM-LEVEL: Spam detection results:  0
 AWL 0.019 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: Re: [pmg-devel] [PATCH pmg-api 2/2] ruledb: content-type: add flags
 for source of matching
X-BeenThere: pmg-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Mail Gateway development discussion
 <pmg-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pmg-devel/>
List-Post: <mailto:pmg-devel@lists.proxmox.com>
List-Help: <mailto:pmg-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pmg-devel>, 
 <mailto:pmg-devel-request@lists.proxmox.com?subject=subscribe>
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="us-ascii"; Format="flowed"
Errors-To: pmg-devel-bounces@lists.proxmox.com
Sender: "pmg-devel" <pmg-devel-bounces@lists.proxmox.com>

code looks mostly fine (some comments inline)

but one higher level question:

does it really make sense to expose 3 settings for this?

e.g. wouldn't it much more simple to have a 'default' and 'strict' mode
where 'strict' only matches on the contents file type?

both filename&header can be set aribtrarily by the sender, so doing
filtering based on that seem dangerous?

Or is there some specific use case that makes it necessary
to specify all 3 sources separately ?

On 2/12/25 16:12, Stoiko Ivanov wrote:
> our current content-type matching is sensibly quite cautious in
> matching if any available information indicates a potential match:
> * mime-type detection based on file contents
> * mime-type detection based on file suffix
> * content-type header
> 
> Sometimes this can lead to surprises (e.g. when a MUA sets the
> filetype of a pdf to application/octet-stream (the default type if no
> information is available).
> 
> This change gives users the option to rely only on some of the sources
> for matching.
> 
> This is a fix for the intial request in #2691 and addresses the
> suggestion from Friedrich from:
> https://bugzilla.proxmox.com/show_bug.cgi?id=5618#c2
> 
> inspired by the changes for disclaimer released with PMG 8.1:
> 51d1507 ("fix #2430: ruledb disclaimer: make separator configurable")
> 
> Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
> ---
>   src/PMG/RuleDB/ContentTypeFilter.pm | 91 +++++++++++++++++++++++++++--
>   1 file changed, 86 insertions(+), 5 deletions(-)
> 
> diff --git a/src/PMG/RuleDB/ContentTypeFilter.pm b/src/PMG/RuleDB/ContentTypeFilter.pm
> index 0199311..0dafa64 100644
> --- a/src/PMG/RuleDB/ContentTypeFilter.pm
> +++ b/src/PMG/RuleDB/ContentTypeFilter.pm
> @@ -26,7 +26,7 @@ sub otype_text {
>   }
>   
>   sub new {
> -    my ($type, $fvalue, $ogroup) = @_;
> +    my ($type, $fvalue, $ogroup, $header, $magic, $glob) = @_;
>   
>       my $class = ref($type) || $type;
>   
> @@ -36,6 +36,9 @@ sub new {
>       }
>   
>       my $self = $class->SUPER::new('content-type', $fvalue, $ogroup);
> +    $self->{header} = $header;
> +    $self->{magic} = $magic;
> +    $self->{glob} = $glob;
>   
>       return $self;
>   }
> @@ -52,9 +55,53 @@ sub load_attr {
>   	$obj->{field_value} = $nt;
>       }
>   
> +    my $sth = $ruledb->{dbh}->prepare(
> +	"SELECT * FROM Attribut WHERE Object_ID = ?");
> +
> +    $sth->execute($id);
> +
> +    $obj->{header} = $obj->{magic} = $obj->{glob} = 1;
> +
> +    while (my $ref = $sth->fetchrow_hashref()) {
> +	if ($ref->{name} =~ /^(header|magic|glob)$/) {
> +	    $obj->{$1} = $ref->{value};
> +	}
> +    }
> +
> +    $sth->finish();
> +
> +    $obj->{id} = $id;
> +
> +    $obj->{digest} = Digest::SHA::sha1_hex(
> +	$id, $value, $ogroup, $obj->{header} // 1, $obj->{magic} //1 , $obj->{glob} // 1);

missing whitespace: '//1' -> '// 1'

> +
>       return $obj;
>   }
>   
> +sub save {
> +    my ($self, $ruledb) = @_;
> +
> +    if (defined($self->{id})) {
> +	#update - clean old attribut entries
> +	$ruledb->{dbh}->do(
> +	    "DELETE FROM Attribut WHERE Object_ID = ?",
> +	    undef, $self->{id});
> +    }
> +
> +    $self->{id} = $self->SUPER::save($ruledb);
> +
> +    for my $prop (qw(header magic glob)) {
> +	if (defined($self->{$prop})) {
> +	    $ruledb->{dbh}->do(
> +		"INSERT INTO Attribut (Value, Name, Object_ID) VALUES (?, ?, ?) ".
> +		"ON CONFLICT(Object_ID, Name) DO UPDATE SET Value = Excluded.Value ",
> +		undef, $self->{$prop}, $prop,  $self->{id});
> +	}
> +    }
> +
> +    return $self->{id};
> +}
> +
>   sub parse_entity {
>       my ($self, $entity) = @_;
>   
> @@ -78,11 +125,14 @@ sub parse_entity {
>   
>   	my $glob_ct = $entity->{PMX_glob_ct};
>   
> -	if ($header_ct && $header_ct =~ m|$self->{field_value}|) {
> +	my $check_header = !defined($self->{header}) || ${self}->{header};
> +	my $check_magic = !defined($self->{magic}) || ${self}->{magic};
> +	my $check_glob = !defined($self->{glob}) || ${self}->{glob};

IMHO would be more readable when using the same syntax as above:

my $check_header = $self->{header} // 1;

what do you think?

> +	if ($header_ct && $check_header && $header_ct =~ m|$self->{field_value}|) {
>   	    push @$res, $id;
> -	} elsif ($magic_ct && $magic_ct =~ m|$self->{field_value}|) {
> +	} elsif ($magic_ct && $check_magic && $magic_ct =~ m|$self->{field_value}|) {
>   	    push @$res, $id;
> -	} elsif ($glob_ct && $glob_ct =~ m|$self->{field_value}|) {
> +	} elsif ($glob_ct && $check_glob && $glob_ct =~ m|$self->{field_value}|) {
>   	    push @$res, $id;
>   	}
>       }
> @@ -112,19 +162,50 @@ sub properties {
>   	    pattern => '[0-9a-zA-Z\/\\\[\]\+\-\.\*\_]+',
>   	    maxLength => 1024,
>   	},
> +	header => {
> +	    description => "use content-type from mail-header for matching",

is that really the 'mail-header' or the "mime-part header" ?
(not sure how to express that in a way it's clear to all)

> +	    type => 'boolean',
> +	    optional => 1,
> +	    default => 1,
> +	},
> +	magic => {
> +	    description => "use content-type from scanning the content for matching",
> +	    type => 'boolean',
> +	    optional => 1,
> +	    default => 1,
> +	},
> +	glob => {
> +	    description => "use content-type based on file-name for matching",
> +	    type => 'boolean',
> +	    optional => 1,
> +	    default => 1,
> +	},
>       };
>   }
>   
>   sub get {
>       my ($self) = @_;
>   
> -    return { contenttype => $self->{field_value} };
> +    return {
> +	contenttype => $self->{field_value},
> +	header => $self->{header},
> +	magic => $self->{magic},
> +	glob => $self->{glob},
> +    };
>   }
>   
>   sub update {
>       my ($self, $param) = @_;
>   
>       $self->{field_value} = $param->{contenttype};
> +
> +    for my $prop (qw(header magic glob)) {
> +	if (defined($param->{$prop}) && $param->{$prop} == 0) {
> +	    $self->{$prop} = 0;
> +	} else {
> +	    delete $self->{$prop};
> +	}
> +    }
>   }
>   
>   1;


_______________________________________________
pmg-devel mailing list
pmg-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pmg-devel