From 1726d68018f45cc3de625cfa50ac8fb92a9ed76c Mon Sep 17 00:00:00 2001 From: mdecimus Date: Mon, 23 Dec 2024 12:22:42 +0100 Subject: [PATCH] Improve SPOOF_DISPLAY_NAME detection (fixes #982) --- crates/spam-filter/src/analysis/from.rs | 40 +++++++++++++++++-------- tests/resources/smtp/antispam/from.test | 21 +++++++++++++ 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/crates/spam-filter/src/analysis/from.rs b/crates/spam-filter/src/analysis/from.rs index 73e4a9ae7..93b14479c 100644 --- a/crates/spam-filter/src/analysis/from.rs +++ b/crates/spam-filter/src/analysis/from.rs @@ -8,6 +8,7 @@ use std::future::Future; use common::Server; use mail_parser::HeaderName; +use nlp::tokenizers::types::{TokenType, TypesTokenizer}; use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8}; use crate::{Email, SpamFilterContext}; @@ -81,22 +82,37 @@ impl SpamFilterAnalyzeFrom for Server { } else if from_name_trimmed == from_addr.address { ctx.result.add_tag("FROM_DN_EQ_ADDR"); } else { - let from_name_addr = Email::new(from_name_trimmed); if from_addr_is_valid { ctx.result.add_tag("FROM_HAS_DN"); } - if from_name_addr.is_valid() { - if (from_addr_is_valid - && from_name_addr.domain_part.sld != from_addr.domain_part.sld) - || (!env_from_empty - && ctx.output.env_from_addr.domain_part.sld - != from_name_addr.domain_part.sld) - || (env_from_empty - && ctx.output.ehlo_host.sld != from_name_addr.domain_part.sld) + + if from_name_trimmed.contains('@') { + if let Some(from_name_addr) = TypesTokenizer::new(from_name_trimmed) + .tokenize_numbers(false) + .tokenize_urls(false) + .tokenize_urls_without_scheme(false) + .tokenize_emails(true) + .filter_map(|t| match t.word { + TokenType::Email(email) => { + let email = Email::new(email); + email.is_valid().then_some(email) + } + _ => None, + }) + .next() { - ctx.result.add_tag("SPOOF_DISPLAY_NAME"); - } else { - ctx.result.add_tag("FROM_NEQ_DISPLAY_NAME"); + if (from_addr_is_valid + && from_name_addr.domain_part.sld != from_addr.domain_part.sld) + || (!env_from_empty + && ctx.output.env_from_addr.domain_part.sld + != from_name_addr.domain_part.sld) + || (env_from_empty + && ctx.output.ehlo_host.sld != from_name_addr.domain_part.sld) + { + ctx.result.add_tag("SPOOF_DISPLAY_NAME"); + } else { + ctx.result.add_tag("FROM_NEQ_DISPLAY_NAME"); + } } } } diff --git a/tests/resources/smtp/antispam/from.test b/tests/resources/smtp/antispam/from.test index 39bf73929..af19c6dda 100644 --- a/tests/resources/smtp/antispam/from.test +++ b/tests/resources/smtp/antispam/from.test @@ -178,3 +178,24 @@ expect FROMHOST_NORES_A_OR_MX FROM_EQ_ENVFROM FROM_NO_DN From: hello@nomx.org Test + +envelope_from baz@domain.org +expect SPOOF_DISPLAY_NAME FROM_HAS_DN FROM_EQ_ENVFROM + +From: "Foo (foo@bar.com)" + +Test + +envelope_from baz@domain.org +expect SPOOF_DISPLAY_NAME FROM_HAS_DN FROM_EQ_ENVFROM + +From: "Foo foo@bar.com" + +Test + +envelope_from baz@domain.org +expect SPOOF_DISPLAY_NAME FROM_HAS_DN FROM_EQ_ENVFROM + +From: "Foo 'foo@bar.com'" + +Test