From 5dbb0cb8af6d17ba18a9a4a794ca820c8606d673 Mon Sep 17 00:00:00 2001 From: Stacy Brock Date: Mon, 29 Jul 2024 09:19:57 -0700 Subject: [PATCH] Normalize all inputs to is_in_message() function --- filter-rules.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/filter-rules.py b/filter-rules.py index 82b15a5..29c8720 100644 --- a/filter-rules.py +++ b/filter-rules.py @@ -1,3 +1,5 @@ +import re + BLOCK_EMAIL = [ 'support-noreply@status.duosecurity.com', 'alerts@alerts.mail.hpe.com', @@ -24,6 +26,7 @@ BLOCK_KEYWORDS = [ 'no longer would like to be contacted', 'opt-out', 'opt out', + 'piano', 'pmp exam', 'prefer not to receive', 'prefer not to see', @@ -41,6 +44,7 @@ BLOCK_KEYWORDS = [ 'want to receive', 'webcast', 'webinar', + 'welder', 'white paper', 'whitepaper', 'wish to be contacted', @@ -311,7 +315,7 @@ def filter_message(self, message): return # delete other spam - if is_in_message(normalized['BLOCK_ANNOYING'], message): + if is_in_message(BLOCK_ANNOYING, message): self._log_result(message, 'junking spam containing annoying content') move_message(message, 'Junk Email') return @@ -336,7 +340,7 @@ def filter_message(self, message): return # junk messages with blocked keywords - if is_in_message(normalized['BLOCK_KEYWORDS'], message): + if is_in_message(BLOCK_KEYWORDS, message): self._log_result(message, 'junking spam containing blocked keyword') move_message(message, 'Junk Email') return @@ -381,14 +385,17 @@ def is_in_message(list_, message): Returns True if any string in the list is found in the message """ is_found = False - message_body = message.body.lower() - message_subject = message.subject.lower() - message_from = message.sender.address.lower() - for t in list_: - term = t.lower() - if (term in message_subject or term in message_from - or term in message_body): + # normalize inputs + search_terms = [x.lower() for x in list_] + message_body = message.body.lower().replace('\ufeff', '') + message_subject = message.subject.lower().replace('\ufeff', '') + message_from = message.sender.address.lower().replace('\ufeff', '') + + for term in search_terms: + if (re.search(term, message_subject) + or re.search(term, message_from) + or re.search(term, message_body)): is_found = True break return is_found @@ -413,10 +420,6 @@ def init_filters(self): for k,v in self._categories.items(): category_cache[k] = v - # normalize filter keyword lists - normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS] - normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING] - def get_header(header_key, headers): vals = [] for header in headers: