Files
mailfilter/filter-rules.py
2023-11-07 10:04:49 -08:00

381 lines
12 KiB
Python

BLOCK_EMAIL = [
'chirhart@amazon.com',
'ron.krogel@citrix.com',
'lisa@duo.com',
'replieswelcome@duo.com',
'webinars@duo.com',
'alerts@alerts.mail.hpe.com',
'viva-noreply@microsoft.com',
'jramiro@pagerduty.com',
'info@snowflake.com',
'noreply-marketplace@zoom.us',
'equal.opportunity@oregonstate.edu',
'diversity@oregonstate.edu',
'evals@oregonstate.edu',
'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com',
]
BLOCK_KEYWORDS = [
'advertising services',
'closeml.com',
'email campaign',
'email preferences',
'end these email updates',
'manage your preferences',
'marketing',
'megacast',
'modify your preferences',
'newsletter',
'no longer would like to be contacted',
'opt-out',
'opt out',
'prefer not to receive',
'prefer not to see',
'rather not receive',
'remove from list',
'remove from this list',
'Samsung SDS America',
'sidekickopen',
'subscription preferences',
'survey',
'take-me-off',
'this advertisement',
'unsub_center',
'unsubscribe',
'want to receive',
'webcast',
'webinar',
'white paper',
'whitepaper',
'wish to be contacted',
'wish to receive',
]
BLOCK_DOMAINS = [
'aafintl.com',
'atscale.com',
'astutechsolutions.com',
'bytespeed.com',
'checkpoint.com',
'cmadvantage',
'customeriomail.com',
'denodo.com',
'exacttarget.com',
'freshsales.io',
'ikigailabs.io',
'informareachmedia.com',
'javentechnologies.com',
'kuusakoski.com',
'mailgun.net',
'malwarebytes.com',
'matrixservice.com',
'mimecast.com',
'mktomail.com',
'msgfocus.com',
'norstar.net',
'orjuno.com',
'pphosted.com',
'radware.com',
'rsmatco.com',
'rubrain.agency',
'sendgrid.net',
'sparkpostmail.com',
'techmate.com',
'thesourcery.com',
'trustedmailservers.com',
'zerowait.com',
'znsrc.com'
]
BLOCK_ANNOYING = [
'charitable fund drive',
'election reminder',
'faculty senate agenda',
'food drive',
'food share',
'general election',
'giving tuesday',
'hardship leave donations needed',
'register to vote'
'service desk survey request',
'special election',
'voter registration'
]
ALLOW = [
'oregonstate.edu',
'github.com',
'osu.atlassian.net',
'oregonstateuniversity.atlassian.net',
'duo.com',
'sns.amazonaws.com',
'opsgenie.net',
'notify@teamdynamixapp.com',
'newsbites@email.sans.org',
'noreply@box.com',
'noreply@email.teams.microsoft.com',
'no-reply@sharepointonline.com',
'govdelivery.com',
'linkoregon.org',
'pdxwit.org',
'busyconf.com',
'support@githubsupport.com',
'microsoft.com',
'docusign.net',
'ideal-logic.com',
'heliocampus.atlassian.net',
'ctptravelservices.com',
'Travel@concursolutions.com',
'substack.com',
'nytdirect@nytimes.com'
]
normalized = {}
folder_cache = {}
def filter_message(self, message):
# normalize message attributes
normalized_to = [x.address.lower() for x in message.to]
normalized_from = message.sender.address.lower()
# filter unactionable IAR emails
automated_sources = [
'coresys@lists.oregonstate.edu',
'iar.ref@oregonstate.edu',
'iar.systems-team@oregonstate.edu',
'changes_osu@heliocampus.com',
'no-reply@vmockmail.com',
'noreply-beaverhub@oregonstate.edu',
'noreply@chatter.salesforce.com'
]
if bool([x for x in automated_sources if(x in normalized_from)]):
unactionable = [x.lower() for x in [
'DWPRODRAW Verification',
'Job Monitor',
' - OK',
'ODProd Row Count Comparison',
'Oracle ODprod Sessions Older Than Today',
'DSDB Server Audit',
'UserBase.Users users deactivated due to ORG changes and termination',
'SSRS Datasets Needing Caching',
'Audit Update',
'ScholarUniverse to GRRS load',
'OACIS Pending Actions Notice',
'You Have OSUF Disapproved Reimbursements',
'Your GRRS to Banner scholarship load report',
'You Have OSUF Reimbursements to Review',
'You Have Scholarship Payment Plans',
'You Have Direct Payment Requests to Review',
'You Have Disapproved Scholarship Payment Plans',
'OSUF Reimbursements Needing Your Prompt Attention',
'Redistribution Verification Error',
'Direct Payment Request',
'Status Change in your Detail Code Request',
'loaded into the Index Reimbursement System',
'JV required for',
'Changes via Tableau REST API at OSU',
'DSDBTEST',
'has been assigned to you or your queue',
'VMock CSV S3 Upload Failed',
'Your Daily Digest for Oregon State University'
]]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
return
# filter unactionable InCommon SSL cert emails
if 'support@cert-manager.com' in normalized_from:
actionable = [
'iar',
'sig',
'analytics',
'cwp-access',
'dsdb',
'tableau'
]
if is_in_message(actionable, message):
self._log_result(message,
f"keeping message for actionable SSL notification")
return
else:
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
return
# filter dependabot
if 'dependabot[bot]' in str(message.sender):
self._log_result(message, 'moving to dependabot')
move_message(message, '99-dependabot')
return
# filter ACTWON
if 'actwon_administration@lists.oregonstate.edu' in normalized_from:
if not is_in_message(['stacy brock', 'mist', 'integration'], message):
self._log_result(message, 'moving to ACTWON')
move_message(message, 'lists/ACTWON')
return
# filter ACUG
if 'isacug@oregonstate.edu' in normalized_to:
self._log_result(message, 'moving to ACUG')
move_message(message, 'lists/ACUG')
return
# filter HelioCampus helpdesk
if 'jira@heliocampus.atlassian.net' in normalized_from:
self._log_result(message, 'moving to 5-hc-helpdesk')
move_message(message, '5-hc-helpdesk')
return
# filter alerts-sig
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
'mcc-b12-stor1@oregonstate.edu',
'ousclus@oregonstate.edu',
'isilon@storage.sig.oregonstate.edu',
'me4012@sig.oregonstate.edu']
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
self._log_result(message, 'moving to alerts-sig')
move_message(message, 'lists/alerts-sig')
return
# filter backup-nightly
if 'backup-nightly@lists.oregonstate.edu' in message.to:
self._log_result(message, 'moving to backup-nightly')
move_message(message, 'lists/backup-nightly')
return
# filter quarantine spam
if 'quarantine@messaging.microsoft.com' in normalized_from:
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
return
# delete Atlassian spam
if ('confluence@osu.atlassian.net' in normalized_from
and '[Confluence] Daily Digest' in message.subject):
self._log_result(message, 'deleting atlassian spam')
message.delete()
return
# delete Rave junk
if ('guardian@getrave.com' in normalized_from
and 'New Guardian Chat' in message.subject):
self._log_result(message, 'deleting rave message')
message.delete()
return
# delete conference spam
if 'brocks+conf@onid.oregonstate.edu' in normalized_to:
self._log_result(message, 'deleting conference spam')
message.delete()
return
# delete other spam
if is_in_message(normalized['BLOCK_ANNOYING'], message):
self._log_result(message, 'junking spam containing annoying content')
move_message(message, 'Junk Email')
return
# keep messages from allowed emails and domains
for good in ALLOW:
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
self._log_result(message,
f"keeping message from allowed sender {good}")
return
# junk messages from blocked senders
if normalized_from in BLOCK_EMAIL:
self._log_result(message, 'junking spam from blocked sender')
move_message(message, 'Junk Email')
return
# junk messages with blocked keywords
if is_in_message(normalized['BLOCK_KEYWORDS'], message):
self._log_result(message, 'junking spam containing blocked keyword')
move_message(message, 'Junk Email')
return
# process message headers into a sane data structure
headers = []
for header in message.message_headers:
h = {}
h[header['name']] = header['value'].lower()
headers.append(h)
# junk messages from blocked domains
is_spam = False
for domain in BLOCK_DOMAINS:
if domain in normalized_from:
is_spam = True
break
if search_headers(domain, headers):
is_spam = True
break
if is_spam:
self._log_result(message, 'junking spam from blocked domain')
move_message(message, 'Junk Email')
return
# junk known spam headers
if (int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5
or get_header('X-Mailgun-List-Address', headers)
or get_header('X-SFDC-EmailCategory', headers) == 'apimassmail'):
self._log_result(message, 'junking spam with known header')
move_message(message, 'Junk Email')
return
# KEEP MESSAGE
self._log_result(message, 'keeping message, passed all filter checks')
def is_in_message(list_, message):
"""search a message for a list of strings
Returns True if any string in the list is found in the message
"""
is_found = False
message_body = message.body.lower()
message_subject = message.subject.lower()
message_from = message.sender.address.lower()
for term in list_:
if (term in message_subject or term in message_from
or term in message_body):
is_found = True
break
return is_found
def move_message(message, folder):
if folder not in folder_cache:
return False
message.move(folder_cache[folder])
def init_filters(self):
# hack to copy a dict from parent object into local object
for k,v in self._folders.items():
folder_cache[k] = v
# normalize filter keyword lists
normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING]
def get_header(header_key, headers):
vals = []
for header in headers:
if header_key in header:
for val in header.values():
vals.append(val)
if len(vals) > 1:
return vals
elif len(vals) == 1:
return vals[0]
else:
return False
def search_headers(search, headers):
for header in headers:
for val in header.values():
if search in val:
return True
return False