Improve filter-rules.py

Changelist:
- Add init_filters() func as an entry point when filters are reloaded
  from a file. This is a good place to put any code that should be
  loaded once instead of on a per-message basis.
- Added helper functions for moving messages and checking if a message
  has any matches in a list of strings.
- Created local object caches for normalized keyword lists and mailbox
  folder IDs. This puts the data closer to where it is needed, and
  removes the need for passing 'self' around when filtering.
- Added filter rule to catch unactionable email.
This commit is contained in:
Stacy Brock
2021-04-14 14:43:34 -07:00
parent f1a87e5496
commit 6af6eed48d

View File

@@ -11,29 +11,19 @@ BLOCK_EMAIL = [
]
BLOCK_KEYWORDS = [
'charitable fund drive',
'election reminder',
'email preferences',
'end these email updates',
'food drive',
'food share',
'general election',
'giving tuesday',
'hardship leave donations needed',
'manage your preferences',
'modify your preferences',
'opt-out',
'opt out',
'prefer not to receive',
'prefer not to see',
'register to vote'
'Samsung SDS America',
'sidekickopen',
'special election',
'subscription preferences',
'survey',
'unsubscribe',
'voter registration',
'want to receive',
'webinar',
'whitepaper',
@@ -42,7 +32,21 @@ BLOCK_KEYWORDS = [
BLOCK_DOMAINS = [
'customeriomail.com',
'mailgun.net'
'mailgun.net',
'norstar.net'
]
BLOCK_ANNOYING = [
'charitable fund drive',
'election reminder',
'food drive',
'food share',
'general election',
'giving tuesday',
'hardship leave donations needed',
'register to vote'
'special election',
'voter registration'
]
ALLOW = [
@@ -65,11 +69,25 @@ ALLOW = [
'microsoft.com'
]
normalized = {}
folder_cache = {}
def filter_message(self, message):
# normalize message attributes
normalized_to = [x.address.lower() for x in message.to]
normalized_from = message.sender.address.lower()
normalized_subject = message.subject.lower()
# filter unactionable coresys emails
if 'coresys@lists.oregonstate.edu' in normalized_from:
unactionable = [x.lower() for x in [
'DWPRODRAW Verification',
'BFPDB Reimbursement Job Monitor - OK',
'DSDB Important Job Monitor - OK'
]]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
return
# filter alerts-sig
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
@@ -78,7 +96,7 @@ def filter_message(self, message):
'isilon@storage.sig.oregonstate.edu']
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
self._log_result(message, 'moving to alerts-sig')
message.move(self._folders['alerts-sig'])
move_message(message, 'alerts-sig')
return
# filter conference spam
@@ -90,7 +108,7 @@ def filter_message(self, message):
# filter backup-nightly
if 'backup-nightly@lists.oregonstate.edu' in message.to:
self._log_result(message, 'moving to backup-nightly')
message.move(self._folders['backup-nightly'])
move_message(message, 'backup-nightly')
return
# delete HP alert spam
@@ -99,6 +117,12 @@ def filter_message(self, message):
message.delete()
return
# delete other spam
if is_in_message(normalized['BLOCK_ANNOYING'], message):
self._log_result(message, 'junking spam containing annoying content')
move_message(message, 'Junk Email')
return
# keep messages from allowed emails and domains
for good in ALLOW:
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
@@ -109,22 +133,14 @@ def filter_message(self, message):
# junk messages from blocked senders
if normalized_from in BLOCK_EMAIL:
self._log_result(message, 'junking spam from blocked sender')
message.move(self._folders['Junk Email'])
move_message(message, 'Junk Email')
return
# junk messages with blocked keywords in message body
is_spam = False
message_body = message.body.lower()
for phrase in self._normalized['BLOCK_KEYWORDS']:
if phrase in normalized_subject:
is_spam = True
break
if phrase in message_body:
is_spam = True
break
if is_spam:
# junk messages with blocked keywords
if is_in_message(normalized['BLOCK_KEYWORDS'], message):
self._log_result(message, 'junking spam containing blocked keyword')
message.move(self._folders['Junk Email'])
move_message(message, 'Junk Email')
return
# process message headers into a sane data structure
@@ -144,21 +160,49 @@ def filter_message(self, message):
break
if is_spam:
self._log_result(message, 'junking spam from blocked domain')
message.move(self._folders['Junk Email'])
move_message(message, 'Junk Email')
return
# junk known spam headers
if (get_header('X-Spam-Flag' == 'YES', headers)
or int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5):
self._log_result(message, 'junking spam with known header')
message.move(self._folders['Junk Email'])
move_message(message, 'Junk Email')
return
# KEEP MESSAGE
self._log_result(message, 'keeping message, passed all filter checks')
def normalize_lists(self):
self._normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
def is_in_message(list_, message):
"""search a message for a list of strings
Returns True if any string in the list is found in the message
"""
is_found = False
message_body = message.body.lower()
message_subject = message.subject.lower()
for term in list_:
if term in message_subject:
is_found = True
break
if term in message_body:
is_found = True
break
return is_found
def move_message(message, folder):
if folder not in folder_cache:
return False
message.move(folder_cache[folder])
def init_filters(self):
# hack to copy a dict from parent object into local object
for k,v in self._folders.items():
folder_cache[k] = v
normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING]
def get_header(header_key, headers):
vals = []
@@ -174,9 +218,8 @@ def get_header(header_key, headers):
return False
def search_headers(search, headers):
vals = []
is_found = False
for header in headers:
for val in header.values():
if search in val:
return True
return False