409 lines
13 KiB
Python
409 lines
13 KiB
Python
BLOCK_EMAIL = [
|
|
'chirhart@amazon.com',
|
|
'ron.krogel@citrix.com',
|
|
'lisa@duo.com',
|
|
'replieswelcome@duo.com',
|
|
'webinars@duo.com',
|
|
'alerts@alerts.mail.hpe.com',
|
|
'viva-noreply@microsoft.com',
|
|
'jramiro@pagerduty.com',
|
|
'info@snowflake.com',
|
|
'noreply-marketplace@zoom.us',
|
|
'equal.opportunity@oregonstate.edu',
|
|
'diversity@oregonstate.edu',
|
|
'evals@oregonstate.edu',
|
|
'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com',
|
|
]
|
|
|
|
BLOCK_KEYWORDS = [
|
|
'advertising services',
|
|
'closeml.com',
|
|
'email campaign',
|
|
'email preferences',
|
|
'end these email updates',
|
|
'manage your preferences',
|
|
'marketing',
|
|
'megacast',
|
|
'modify your preferences',
|
|
'newsletter',
|
|
'no longer would like to be contacted',
|
|
'opt-out',
|
|
'opt out',
|
|
'prefer not to receive',
|
|
'prefer not to see',
|
|
'rather not receive',
|
|
'remove from list',
|
|
'remove from this list',
|
|
'Samsung SDS America',
|
|
'sidekickopen',
|
|
'subscription preferences',
|
|
'survey',
|
|
'take-me-off',
|
|
'this advertisement',
|
|
'unsub_center',
|
|
'unsubscribe',
|
|
'want to receive',
|
|
'webcast',
|
|
'webinar',
|
|
'white paper',
|
|
'whitepaper',
|
|
'wish to be contacted',
|
|
'wish to receive',
|
|
]
|
|
|
|
BLOCK_DOMAINS = [
|
|
'aafintl.com',
|
|
'atscale.com',
|
|
'astutechsolutions.com',
|
|
'bytespeed.com',
|
|
'checkpoint.com',
|
|
'cmadvantage',
|
|
'customeriomail.com',
|
|
'denodo.com',
|
|
'exacttarget.com',
|
|
'freshsales.io',
|
|
'ikigailabs.io',
|
|
'impetus.com',
|
|
'informareachmedia.com',
|
|
'javentechnologies.com',
|
|
'kuusakoski.com',
|
|
'mailgun.net',
|
|
'malwarebytes.com',
|
|
'matrixservice.com',
|
|
'mimecast.com',
|
|
'mktomail.com',
|
|
'msgfocus.com',
|
|
'norstar.net',
|
|
'orjuno.com',
|
|
'pphosted.com',
|
|
'radware.com',
|
|
'rsmatco.com',
|
|
'rubrain.agency',
|
|
'sendgrid.net',
|
|
'sparkpostmail.com',
|
|
'techmate.com',
|
|
'thesourcery.com',
|
|
'trustedmailservers.com',
|
|
'zerowait.com',
|
|
'znsrc.com'
|
|
]
|
|
|
|
BLOCK_ANNOYING = [
|
|
'charitable fund drive',
|
|
'election reminder',
|
|
'faculty senate agenda',
|
|
'food drive',
|
|
'food share',
|
|
'general election',
|
|
'giving tuesday',
|
|
'hardship leave donations needed',
|
|
'register to vote'
|
|
'service desk survey request',
|
|
'special election',
|
|
'voter registration'
|
|
]
|
|
|
|
ALLOW = [
|
|
'oregonstate.edu',
|
|
'github.com',
|
|
'osu.atlassian.net',
|
|
'oregonstateuniversity.atlassian.net',
|
|
'duo.com',
|
|
'sns.amazonaws.com',
|
|
'opsgenie.net',
|
|
'notify@teamdynamixapp.com',
|
|
'newsbites@email.sans.org',
|
|
'noreply@box.com',
|
|
'noreply@email.teams.microsoft.com',
|
|
'no-reply@sharepointonline.com',
|
|
'govdelivery.com',
|
|
'linkoregon.org',
|
|
'pdxwit.org',
|
|
'busyconf.com',
|
|
'support@githubsupport.com',
|
|
'microsoft.com',
|
|
'docusign.net',
|
|
'ideal-logic.com',
|
|
'heliocampus.atlassian.net',
|
|
'ctptravelservices.com',
|
|
'Travel@concursolutions.com',
|
|
'substack.com',
|
|
'nytdirect@nytimes.com'
|
|
]
|
|
|
|
normalized = {}
|
|
folder_cache = {}
|
|
category_cache = {}
|
|
|
|
def filter_message(self, message):
|
|
# normalize message attributes
|
|
normalized_to = [x.address.lower() for x in message.to]
|
|
normalized_from = message.sender.address.lower()
|
|
|
|
# process message headers into a sane data structure
|
|
headers = []
|
|
for header in message.message_headers:
|
|
h = {}
|
|
h[header['name']] = header['value'].lower()
|
|
headers.append(h)
|
|
|
|
# filter unactionable IAR emails
|
|
automated_sources = [
|
|
'coresys@lists.oregonstate.edu',
|
|
'iar.ref@oregonstate.edu',
|
|
'iar.systems-team@oregonstate.edu',
|
|
'changes_osu@heliocampus.com',
|
|
'no-reply@vmockmail.com',
|
|
'noreply-beaverhub@oregonstate.edu',
|
|
'noreply@chatter.salesforce.com'
|
|
]
|
|
if bool([x for x in automated_sources if(x in normalized_from)]):
|
|
unactionable = [x.lower() for x in [
|
|
'DWPRODRAW Verification',
|
|
'Job Monitor',
|
|
' - OK',
|
|
'ODProd Row Count Comparison',
|
|
'Oracle ODprod Sessions Older Than Today',
|
|
'DSDB Server Audit',
|
|
'UserBase.Users users deactivated due to ORG changes and termination',
|
|
'SSRS Datasets Needing Caching',
|
|
'Audit Update',
|
|
'ScholarUniverse to GRRS load',
|
|
'OACIS Pending Actions Notice',
|
|
'You Have OSUF Disapproved Reimbursements',
|
|
'Your GRRS to Banner scholarship load report',
|
|
'You Have OSUF Reimbursements to Review',
|
|
'You Have Scholarship Payment Plans',
|
|
'You Have Direct Payment Requests to Review',
|
|
'You Have Disapproved Scholarship Payment Plans',
|
|
'OSUF Reimbursements Needing Your Prompt Attention',
|
|
'Redistribution Verification Error',
|
|
'Direct Payment Request',
|
|
'Status Change in your Detail Code Request',
|
|
'loaded into the Index Reimbursement System',
|
|
'JV required for',
|
|
'Changes via Tableau REST API at OSU',
|
|
'DSDBTEST',
|
|
'has been assigned to you or your queue',
|
|
'VMock CSV S3 Upload',
|
|
'Your Daily Digest for Oregon State University'
|
|
]]
|
|
if is_in_message(unactionable, message):
|
|
self._log_result(message, 'moving to unactionable')
|
|
move_message(message, 'unactionable')
|
|
return
|
|
|
|
# filter unactionable InCommon SSL cert emails
|
|
if 'support@cert-manager.com' in normalized_from:
|
|
actionable = [
|
|
'iar',
|
|
'sig',
|
|
'analytics',
|
|
'cwp-access',
|
|
'dsdb',
|
|
'tableau'
|
|
]
|
|
if is_in_message(actionable, message):
|
|
self._log_result(message,
|
|
f"keeping message for actionable SSL notification")
|
|
return
|
|
else:
|
|
self._log_result(message, 'moving to unactionable')
|
|
move_message(message, 'unactionable')
|
|
return
|
|
|
|
# filter dependabot
|
|
if 'dependabot[bot]' in str(message.sender):
|
|
self._log_result(message, 'moving to dependabot')
|
|
move_message(message, '99-dependabot')
|
|
return
|
|
|
|
# filter ACTWON
|
|
if 'actwon_administration@lists.oregonstate.edu' in normalized_from:
|
|
if not is_in_message(['stacy brock', 'mist', 'integration'], message):
|
|
self._log_result(message, 'moving to ACTWON')
|
|
move_message(message, 'lists/ACTWON')
|
|
return
|
|
|
|
# filter ACUG
|
|
if 'isacug@oregonstate.edu' in normalized_to:
|
|
self._log_result(message, 'moving to ACUG')
|
|
move_message(message, 'lists/ACUG')
|
|
return
|
|
|
|
# filter HelioCampus helpdesk
|
|
if 'jira@heliocampus.atlassian.net' in normalized_from:
|
|
self._log_result(message, 'moving to 5-hc-helpdesk')
|
|
move_message(message, '5-hc-helpdesk')
|
|
return
|
|
|
|
# filter alerts-sig
|
|
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
|
|
'mcc-b12-stor1@oregonstate.edu',
|
|
'ousclus@oregonstate.edu',
|
|
'isilon@storage.sig.oregonstate.edu',
|
|
'me4012@sig.oregonstate.edu']
|
|
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
|
|
self._log_result(message, 'moving to alerts-sig')
|
|
move_message(message, 'lists/alerts-sig')
|
|
return
|
|
|
|
# filter backup-nightly
|
|
if 'backup-nightly@lists.oregonstate.edu' in message.to:
|
|
self._log_result(message, 'moving to backup-nightly')
|
|
move_message(message, 'lists/backup-nightly')
|
|
return
|
|
|
|
# filter quarantine spam
|
|
if 'quarantine@messaging.microsoft.com' in normalized_from:
|
|
self._log_result(message, 'moving to unactionable')
|
|
move_message(message, 'unactionable')
|
|
return
|
|
|
|
# delete Atlassian spam
|
|
if ('confluence@osu.atlassian.net' in normalized_from
|
|
and '[Confluence] Daily Digest' in message.subject):
|
|
self._log_result(message, 'deleting atlassian spam')
|
|
message.delete()
|
|
return
|
|
|
|
# delete Salesforce spam
|
|
if ('salesforce.com' in normalized_from
|
|
and 'sandbox' in message.subject.lower()):
|
|
self._log_result(message, 'deleting salseforce spam')
|
|
message.delete()
|
|
return
|
|
|
|
# delete Rave junk
|
|
if ('guardian@getrave.com' in normalized_from
|
|
and 'new guardian chat' in message.subject.lower()):
|
|
self._log_result(message, 'deleting rave spam')
|
|
message.delete()
|
|
return
|
|
|
|
# delete conference spam
|
|
if 'brocks+conf@onid.oregonstate.edu' in normalized_to:
|
|
self._log_result(message, 'deleting conference spam')
|
|
message.delete()
|
|
return
|
|
|
|
# delete other spam
|
|
if is_in_message(normalized['BLOCK_ANNOYING'], message):
|
|
self._log_result(message, 'junking spam containing annoying content')
|
|
move_message(message, 'Junk Email')
|
|
return
|
|
|
|
# add 'OSU Inform' category to internal messages sent to DLs
|
|
if ('@oregonstate.edu' in normalized_from
|
|
and ((not get_header('To', headers)
|
|
and not get_header('List-Id', headers))
|
|
or ('inform-c' in ' '.join(normalized_to)))):
|
|
self._log_result(message, "adding category 'OSU Inform'")
|
|
add_category(message, 'OSU Inform')
|
|
return
|
|
|
|
# keep messages from allowed emails and domains
|
|
for good in ALLOW:
|
|
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
|
|
self._log_result(message,
|
|
f"keeping message from allowed sender {good}")
|
|
return
|
|
|
|
# junk messages from blocked senders
|
|
if normalized_from in BLOCK_EMAIL:
|
|
self._log_result(message, 'junking spam from blocked sender')
|
|
move_message(message, 'Junk Email')
|
|
return
|
|
|
|
# junk messages with blocked keywords
|
|
if is_in_message(normalized['BLOCK_KEYWORDS'], message):
|
|
self._log_result(message, 'junking spam containing blocked keyword')
|
|
move_message(message, 'Junk Email')
|
|
return
|
|
|
|
# junk messages from blocked domains
|
|
is_spam = False
|
|
for domain in BLOCK_DOMAINS:
|
|
if domain in normalized_from:
|
|
is_spam = True
|
|
break
|
|
if search_headers(domain, headers):
|
|
is_spam = True
|
|
break
|
|
if is_spam:
|
|
self._log_result(message, 'junking spam from blocked domain')
|
|
move_message(message, 'Junk Email')
|
|
return
|
|
|
|
# junk known spam headers
|
|
if (int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5
|
|
or get_header('X-Mailgun-List-Address', headers)
|
|
or get_header('X-SFDC-EmailCategory', headers) == 'apimassmail'):
|
|
self._log_result(message, 'junking spam with known header')
|
|
move_message(message, 'Junk Email')
|
|
return
|
|
|
|
# KEEP MESSAGE
|
|
self._log_result(message, 'keeping message, passed all filter checks')
|
|
|
|
def is_in_message(list_, message):
|
|
"""search a message for a list of strings
|
|
|
|
Returns True if any string in the list is found in the message
|
|
"""
|
|
is_found = False
|
|
message_body = message.body.lower()
|
|
message_subject = message.subject.lower()
|
|
message_from = message.sender.address.lower()
|
|
|
|
for term in list_:
|
|
if (term in message_subject or term in message_from
|
|
or term in message_body):
|
|
is_found = True
|
|
break
|
|
return is_found
|
|
|
|
def move_message(message, folder_name):
|
|
""" move a message to a folder stored in the folder cache """
|
|
if folder_name not in folder_cache:
|
|
return False
|
|
message.move(folder_cache[folder_name])
|
|
|
|
def add_category(message, category_name):
|
|
""" add an Outlook category to a message """
|
|
if category_name not in category_cache:
|
|
return False
|
|
message.add_category(category_cache[category_name])
|
|
message.save_message()
|
|
|
|
def init_filters(self):
|
|
# hack to copy a dict from parent object into local object
|
|
for k,v in self._folders.items():
|
|
folder_cache[k] = v
|
|
for k,v in self._categories.items():
|
|
category_cache[k] = v
|
|
|
|
# normalize filter keyword lists
|
|
normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
|
|
normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING]
|
|
|
|
def get_header(header_key, headers):
|
|
vals = []
|
|
for header in headers:
|
|
if header_key in header:
|
|
for val in header.values():
|
|
vals.append(val)
|
|
if len(vals) > 1:
|
|
return vals
|
|
elif len(vals) == 1:
|
|
return vals[0]
|
|
else:
|
|
return False
|
|
|
|
def search_headers(search, headers):
|
|
for header in headers:
|
|
for val in header.values():
|
|
if search in val:
|
|
return True
|
|
return False
|