Files
mailfilter/filter-rules.py
2021-04-14 16:25:01 -07:00

227 lines
6.4 KiB
Python

BLOCK_EMAIL = [
'chirhart@amazon.com',
'ron.krogel@citrix.com',
'replieswelcome@duo.com',
'webinars@duo.com',
'diversity@oregonstate.edu',
'jramiro@pagerduty.com',
'info@snowflake.com',
'lisa@duo.com',
'cortana@microsoft.com'
]
BLOCK_KEYWORDS = [
'email preferences',
'end these email updates',
'manage your preferences',
'modify your preferences',
'opt-out',
'opt out',
'prefer not to receive',
'prefer not to see',
'Samsung SDS America',
'sidekickopen',
'subscription preferences',
'survey',
'unsubscribe',
'want to receive',
'webinar',
'whitepaper',
'wish to receive',
]
BLOCK_DOMAINS = [
'customeriomail.com',
'mailgun.net',
'norstar.net'
]
BLOCK_ANNOYING = [
'charitable fund drive',
'election reminder',
'food drive',
'food share',
'general election',
'giving tuesday',
'hardship leave donations needed',
'register to vote'
'special election',
'voter registration'
]
ALLOW = [
'oregonstate.edu',
'github.com',
'duo.com',
'sns.amazonaws.com',
'opsgenie.net',
'notify@teamdynamixapp.com',
'newsbites@email.sans.org',
'noreply@box.com',
'noreply@email.teams.microsoft.com',
'no-reply@sharepointonline.com',
'slalom.com',
'govdelivery.com',
'linkoregon.org',
'megan@pdxwit.org',
'busyconf.com',
'support@githubsupport.com',
'microsoft.com'
]
normalized = {}
folder_cache = {}
def filter_message(self, message):
# normalize message attributes
normalized_to = [x.address.lower() for x in message.to]
normalized_from = message.sender.address.lower()
# filter unactionable coresys emails
if 'coresys@lists.oregonstate.edu' in normalized_from:
unactionable = [x.lower() for x in [
'DWPRODRAW Verification',
'BFPDB Reimbursement Job Monitor - OK',
'DSDB Important Job Monitor - OK'
]]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
return
# filter alerts-sig
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
'mcc-b12-stor1@oregonstate.edu',
'ousclus@oregonstate.edu',
'isilon@storage.sig.oregonstate.edu']
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
self._log_result(message, 'moving to alerts-sig')
move_message(message, 'alerts-sig')
return
# filter conference spam
if 'brocks+conf@onid.oregonstate.edu' in normalized_to:
self._log_result(message, 'deleting conference spam')
message.delete()
return
# filter backup-nightly
if 'backup-nightly@lists.oregonstate.edu' in message.to:
self._log_result(message, 'moving to backup-nightly')
move_message(message, 'backup-nightly')
return
# delete HP alert spam
if normalized_from == 'alerts@alerts.mail.hpe.com':
self._log_result(message, 'deleting HP alert spam')
message.delete()
return
# delete other spam
if is_in_message(normalized['BLOCK_ANNOYING'], message):
self._log_result(message, 'junking spam containing annoying content')
move_message(message, 'Junk Email')
return
# keep messages from allowed emails and domains
for good in ALLOW:
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
self._log_result(message,
f"keeping message from allowed sender {good}")
return
# junk messages from blocked senders
if normalized_from in BLOCK_EMAIL:
self._log_result(message, 'junking spam from blocked sender')
move_message(message, 'Junk Email')
return
# junk messages with blocked keywords
if is_in_message(normalized['BLOCK_KEYWORDS'], message):
self._log_result(message, 'junking spam containing blocked keyword')
move_message(message, 'Junk Email')
return
# process message headers into a sane data structure
headers = []
for header in message.message_headers:
h = {}
h[header['name']] = header['value'].lower()
headers.append(h)
# junk messages from blocked domains
is_spam = False
for domain in BLOCK_DOMAINS:
if domain in normalized_from:
is_spam = True
break
if search_headers(domain, headers):
is_spam = True
break
if is_spam:
self._log_result(message, 'junking spam from blocked domain')
move_message(message, 'Junk Email')
return
# junk known spam headers
if (get_header('X-Spam-Flag' == 'YES', headers)
or int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5):
self._log_result(message, 'junking spam with known header')
move_message(message, 'Junk Email')
return
# KEEP MESSAGE
self._log_result(message, 'keeping message, passed all filter checks')
def is_in_message(list_, message):
"""search a message for a list of strings
Returns True if any string in the list is found in the message
"""
is_found = False
message_body = message.body.lower()
message_subject = message.subject.lower()
for term in list_:
if term in message_subject:
is_found = True
break
if term in message_body:
is_found = True
break
return is_found
def move_message(message, folder):
if folder not in folder_cache:
return False
message.move(folder_cache[folder])
def init_filters(self):
# hack to copy a dict from parent object into local object
for k,v in self._folders.items():
folder_cache[k] = v
normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING]
def get_header(header_key, headers):
vals = []
for header in headers:
if header_key in header:
for val in header.values():
vals.append(val)
if len(vals) > 1:
return vals
elif len(vals) == 1:
return vals[0]
else:
return False
def search_headers(search, headers):
for header in headers:
for val in header.values():
if search in val:
return True
return False