479 lines
14 KiB
Python
479 lines
14 KiB
Python
import re
|
|
|
|
BLOCK_EMAIL = [
|
|
"support-noreply@status.duosecurity.com",
|
|
"alerts@alerts.mail.hpe.com",
|
|
"viva-noreply@microsoft.com",
|
|
"info@snowflake.com",
|
|
"noreply-marketplace@zoom.us",
|
|
"equal.opportunity@oregonstate.edu",
|
|
"diversity@oregonstate.edu",
|
|
"evals@oregonstate.edu",
|
|
"sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com",
|
|
"noreply@msexperience.microsoft.com",
|
|
]
|
|
|
|
BLOCK_KEYWORDS = [
|
|
"advertising service",
|
|
"email campaign",
|
|
"email preferences",
|
|
"end these email updates",
|
|
"hs-sales-engage.com",
|
|
"long term care",
|
|
"marketing",
|
|
"megacast",
|
|
"my subscription",
|
|
"newsletter",
|
|
"no longer would like to be contacted",
|
|
"opt-out",
|
|
"opt out",
|
|
"piano",
|
|
"pmp exam",
|
|
"prefer not to receive",
|
|
"prefer not to see",
|
|
"rather not receive",
|
|
"remove from list",
|
|
"remove from this list",
|
|
"Samsung SDS America",
|
|
"sidekickopen",
|
|
"subscription preferences",
|
|
"survey",
|
|
"take-me-off",
|
|
"this advertisement",
|
|
"unsub_center",
|
|
"unsubscribe",
|
|
"want to receive",
|
|
"webcast",
|
|
"webinar",
|
|
"welder",
|
|
"white paper",
|
|
"whitepaper",
|
|
"wish to be contacted",
|
|
"wish to receive",
|
|
"your notifications",
|
|
"your preferences",
|
|
"your subscription",
|
|
]
|
|
|
|
BLOCK_DOMAINS = [
|
|
"aafintl.com",
|
|
"atscale.com",
|
|
"astutechsolutions.com",
|
|
"bytespeed.com",
|
|
"checkpoint.com",
|
|
"cmadvantage",
|
|
"customeriomail.com",
|
|
"denodo.com",
|
|
"exacttarget.com",
|
|
"freshsales.io",
|
|
"hrciconnect.com",
|
|
"hso.com",
|
|
"ikigailabs.io",
|
|
"impetus.com",
|
|
"informareachmedia.com",
|
|
"javentechnologies.com",
|
|
"kuusakoski.com",
|
|
"mailgun.net",
|
|
"malwarebytes.com",
|
|
"matrixservice.com",
|
|
"mimecast.com",
|
|
"mktomail.com",
|
|
"msgfocus.com",
|
|
"norstar.net",
|
|
"orjuno.com",
|
|
"pphosted.com",
|
|
"qualitynetworks.com",
|
|
"radware.com",
|
|
"rsmatco.com",
|
|
"rubrain.agency",
|
|
"sendgrid.net",
|
|
"sparkpostmail.com",
|
|
"techmate.com",
|
|
"thesourcery.com",
|
|
"trustedmailservers.com",
|
|
"ubtiinc.com",
|
|
"zerowait.com",
|
|
"znsrc.com",
|
|
]
|
|
|
|
BLOCK_ANNOYING = [
|
|
"charitable fund drive",
|
|
"election reminder",
|
|
"faculty senate agenda",
|
|
"food drive",
|
|
"food share",
|
|
"general election",
|
|
"giving tuesday",
|
|
"hardship leave donations needed",
|
|
"register to vote",
|
|
"service desk survey request",
|
|
"special election",
|
|
"voter registration",
|
|
]
|
|
|
|
ALLOW = [
|
|
"oregonstate.edu",
|
|
"github.com",
|
|
"osu.atlassian.net",
|
|
"oregonstateuniversity.atlassian.net",
|
|
"sns.amazonaws.com",
|
|
"opsgenie.net",
|
|
"notify@teamdynamixapp.com",
|
|
"newsbites@email.sans.org",
|
|
"noreply@box.com",
|
|
"noreply@email.teams.microsoft.com",
|
|
"no-reply@sharepointonline.com",
|
|
"govdelivery.com",
|
|
"linkoregon.org",
|
|
"pdxwit.org",
|
|
"busyconf.com",
|
|
"support@githubsupport.com",
|
|
"microsoft.com",
|
|
"docusign.net",
|
|
"ideal-logic.com",
|
|
"heliocampus.atlassian.net",
|
|
"ctptravelservices.com",
|
|
"Travel@concursolutions.com",
|
|
"ghost.io",
|
|
"orders@catertrax.com",
|
|
"nytdirect@nytimes.com",
|
|
]
|
|
|
|
normalized = {}
|
|
folder_cache = {}
|
|
category_cache = {}
|
|
|
|
|
|
def filter_message(self, message):
|
|
# normalize message attributes
|
|
normalized_to = [x.address.lower() for x in message.to]
|
|
normalized_from = message.sender.address.lower()
|
|
|
|
# process message headers into a sane data structure
|
|
headers = []
|
|
for header in message.message_headers:
|
|
h = {}
|
|
h[header["name"]] = header["value"].lower()
|
|
headers.append(h)
|
|
|
|
# filter unactionable IAR emails
|
|
automated_sources = [
|
|
"coresys@lists.oregonstate.edu",
|
|
"iar.ref@oregonstate.edu",
|
|
"iar.systems-team@oregonstate.edu",
|
|
"changes_osu@heliocampus.com",
|
|
"no-reply@vmockmail.com",
|
|
"noreply-beaverhub@oregonstate.edu",
|
|
"chatter-beaverhub@oregonstate.edu",
|
|
]
|
|
if bool([x for x in automated_sources if (x in normalized_from)]):
|
|
unactionable = [
|
|
x.lower()
|
|
for x in [
|
|
"DWPRODRAW Verification",
|
|
"Job Monitor",
|
|
" - OK",
|
|
"ODProd Row Count Comparison",
|
|
"Oracle ODprod Sessions Older Than Today",
|
|
"DSDB Server Audit",
|
|
"UserBase.Users users deactivated due to ORG changes and termination",
|
|
"SSRS Datasets Needing Caching",
|
|
"Audit Update",
|
|
"ScholarUniverse to GRRS load",
|
|
"OACIS Pending Actions Notice",
|
|
"You Have OSUF Disapproved Reimbursements",
|
|
"Your GRRS to Banner scholarship load report",
|
|
"You Have OSUF Reimbursements to Review",
|
|
"You Have Scholarship Payment Plans",
|
|
"You Have Direct Payment Requests to Review",
|
|
"You Have Disapproved Scholarship Payment Plans",
|
|
"OSUF Reimbursements Needing Your Prompt Attention",
|
|
"Redistribution Verification Error",
|
|
"Direct Payment Request",
|
|
"Status Change in your Detail Code Request",
|
|
"loaded into the Index Reimbursement System",
|
|
"JV required for",
|
|
"Changes via Tableau REST API at OSU",
|
|
"DSDBTEST",
|
|
"VMock CSV S3 Upload",
|
|
"Your Daily Digest for Oregon State University",
|
|
]
|
|
]
|
|
if is_in_message(unactionable, message):
|
|
self._log_result(message, "moving to unactionable")
|
|
move_message(message, "zzz-unactionable")
|
|
return
|
|
|
|
# filter servicenow email notifications
|
|
if "mysupport-replies@oregonstate.edu" in normalized_from:
|
|
keep = ["opened on your behalf", "your request REQ"]
|
|
if is_in_message(keep, message):
|
|
self._log_result(message, "keeping servicenow message")
|
|
return
|
|
else:
|
|
self._log_result(message, "moving to servicenow")
|
|
move_message(message, "zzz-servicenow")
|
|
return
|
|
|
|
# filter unactionable InCommon SSL cert emails
|
|
if "support@cert-manager.com" in normalized_from:
|
|
actionable = ["iar", "sig", "analytics", "cwp-access", "dsdb", "tableau"]
|
|
if is_in_message(actionable, message):
|
|
self._log_result(
|
|
message, f"keeping message for actionable SSL notification"
|
|
)
|
|
return
|
|
else:
|
|
self._log_result(message, "moving to unactionable")
|
|
move_message(message, "zzz-unactionable")
|
|
return
|
|
|
|
# filter Student CRM
|
|
if "noreply-beaverhub@oregonstate.edu" in normalized_from:
|
|
unactionable = [
|
|
"has been assigned to you or your queue",
|
|
]
|
|
if is_in_message(unactionable, message):
|
|
self._log_result(message, "moving to unactionable")
|
|
move_message(message, "zzz-unactionable")
|
|
return
|
|
|
|
# filter dependabot
|
|
if "dependabot[bot]" in str(message.sender) or is_in_message(
|
|
["Your Dependabot alerts"], message
|
|
):
|
|
self._log_result(message, "moving to dependabot")
|
|
move_message(message, "zzz-dependabot")
|
|
return
|
|
|
|
# filter ACTWON
|
|
if "actwon_administration@lists.oregonstate.edu" in normalized_from:
|
|
self._log_result(message, "moving to ACTWON")
|
|
move_message(message, "lists/ACTWON")
|
|
return
|
|
|
|
# filter ACUG
|
|
if "isacug@oregonstate.edu" in normalized_to:
|
|
self._log_result(message, "moving to ACUG")
|
|
move_message(message, "lists/ACUG")
|
|
return
|
|
|
|
# filter HelioCampus helpdesk
|
|
if "jira@heliocampus.atlassian.net" in normalized_from:
|
|
self._log_result(message, "moving to zzz-hc-helpdesk")
|
|
move_message(message, "zzz-hc-helpdesk")
|
|
return
|
|
|
|
# filter alerts-sig
|
|
if (
|
|
normalized_from
|
|
in [
|
|
"mcc-b11-stor1@oregonstate.edu",
|
|
"mcc-b12-stor1@oregonstate.edu",
|
|
"ousclus@oregonstate.edu",
|
|
"isilon@storage.sig.oregonstate.edu",
|
|
"me4012@sig.oregonstate.edu",
|
|
]
|
|
or "alarm.DatastoreDiskUsageAlarm" in message.subject
|
|
):
|
|
self._log_result(message, "moving to alerts-sig")
|
|
move_message(message, "lists/alerts-sig")
|
|
return
|
|
|
|
# filter backup-nightly
|
|
if "backup-nightly@lists.oregonstate.edu" in message.to:
|
|
self._log_result(message, "moving to backup-nightly")
|
|
move_message(message, "lists/backup-nightly")
|
|
return
|
|
|
|
# filter quarantine spam
|
|
if "quarantine@messaging.microsoft.com" in normalized_from:
|
|
self._log_result(message, "moving to unactionable")
|
|
move_message(message, "zzz-unactionable")
|
|
return
|
|
|
|
# delete atlassian spam
|
|
if "confluence@osu.atlassian.net" in normalized_from:
|
|
unactionable = [
|
|
"sv1_ds_atlassian",
|
|
"[confluence] daily digest",
|
|
]
|
|
if is_in_message(unactionable, message):
|
|
self._log_result(message, "deleting atlassian spam")
|
|
message.delete()
|
|
return
|
|
|
|
# delete salesforce spam
|
|
if "salesforce.com" in normalized_from:
|
|
unactionable = [
|
|
x.lower()
|
|
for x in [
|
|
"sandbox",
|
|
"resetting your Salesforce password",
|
|
"new Salesforce security token",
|
|
]
|
|
]
|
|
if is_in_message(unactionable, message):
|
|
self._log_result(message, "deleting salesforce spam")
|
|
message.delete()
|
|
return
|
|
|
|
# delete rave junk
|
|
if (
|
|
"guardian@getrave.com" in normalized_from
|
|
and "new guardian chat" in message.subject.lower()
|
|
):
|
|
self._log_result(message, "deleting rave spam")
|
|
message.delete()
|
|
return
|
|
|
|
# delete conference spam
|
|
if (
|
|
"brock+conf@onid.oregonstate.edu" in normalized_to
|
|
or "brock+conf@oregonstate.edu" in normalized_to
|
|
):
|
|
self._log_result(message, "deleting conference spam")
|
|
message.delete()
|
|
return
|
|
|
|
# delete OSU IT Managers list spam
|
|
if "osuitmanagers@oregonstate.edu" in normalized_to:
|
|
self._log_result(message, "deleting osu it managers spam")
|
|
message.delete()
|
|
return
|
|
|
|
# delete other spam
|
|
if is_in_message(BLOCK_ANNOYING, message):
|
|
self._log_result(message, "junking spam containing annoying content")
|
|
move_message(message, "Junk Email")
|
|
return
|
|
|
|
# add 'HelioCampus' category to messages from HC
|
|
if "@heliocampus" in normalized_from:
|
|
self._log_result(message, "adding category 'HelioCampus'")
|
|
add_category(message, "HelioCampus")
|
|
return
|
|
|
|
# keep messages from allowed emails and domains
|
|
for good in ALLOW:
|
|
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
|
|
self._log_result(message, f"keeping message from allowed sender {good}")
|
|
return
|
|
|
|
# junk messages from blocked senders
|
|
if normalized_from in BLOCK_EMAIL:
|
|
self._log_result(message, "junking spam from blocked sender")
|
|
move_message(message, "Junk Email")
|
|
return
|
|
|
|
# junk messages with blocked keywords
|
|
if is_in_message(BLOCK_KEYWORDS, message):
|
|
self._log_result(message, "junking spam containing blocked keyword")
|
|
move_message(message, "Junk Email")
|
|
return
|
|
|
|
# junk messages from blocked domains
|
|
is_spam = False
|
|
for domain in BLOCK_DOMAINS:
|
|
if domain in normalized_from:
|
|
is_spam = True
|
|
break
|
|
if search_headers(domain, headers):
|
|
is_spam = True
|
|
break
|
|
if is_spam:
|
|
self._log_result(message, "junking spam from blocked domain")
|
|
move_message(message, "Junk Email")
|
|
return
|
|
|
|
# junk known spam headers
|
|
if (
|
|
int(get_header("X-MS-Exchange-Organization-SCL", headers)) >= 5
|
|
or get_header("X-Mailgun-List-Address", headers)
|
|
or get_header("X-SFDC-EmailCategory", headers) == "apimassmail"
|
|
):
|
|
self._log_result(message, "junking spam with known header")
|
|
move_message(message, "Junk Email")
|
|
return
|
|
|
|
# add 'OSU Inform' category to internal messages sent to DLs
|
|
if "@oregonstate.edu" in normalized_from and (
|
|
(not get_header("To", headers) and not get_header("List-Id", headers))
|
|
or ("inform-c" in " ".join(normalized_to))
|
|
):
|
|
self._log_result(message, "adding category 'OSU Inform'")
|
|
add_category(message, "OSU Inform")
|
|
return
|
|
|
|
# KEEP MESSAGE
|
|
self._log_result(message, "keeping message, passed all filter checks")
|
|
|
|
|
|
def is_in_message(list_, message):
|
|
"""search a message for a list of strings
|
|
|
|
Returns True if any string in the list is found in the message
|
|
"""
|
|
is_found = False
|
|
|
|
# normalize inputs
|
|
search_terms = [x.lower() for x in list_]
|
|
message_body = message.body.lower().replace("\ufeff", "")
|
|
message_subject = message.subject.lower().replace("\ufeff", "")
|
|
message_from = message.sender.address.lower().replace("\ufeff", "")
|
|
|
|
for term in search_terms:
|
|
if (
|
|
re.search(term, message_subject)
|
|
or re.search(term, message_from)
|
|
or re.search(term, message_body)
|
|
):
|
|
is_found = True
|
|
break
|
|
return is_found
|
|
|
|
|
|
def move_message(message, folder_name):
|
|
"""move a message to a folder stored in the folder cache"""
|
|
if folder_name not in folder_cache:
|
|
return False
|
|
message.move(folder_cache[folder_name])
|
|
|
|
|
|
def add_category(message, category_name):
|
|
"""add an Outlook category to a message"""
|
|
if category_name not in category_cache:
|
|
return False
|
|
message.add_category(category_cache[category_name])
|
|
message.save_message()
|
|
|
|
|
|
def init_filters(self):
|
|
# hack to copy a dict from parent object into local object
|
|
for k, v in self._folders.items():
|
|
folder_cache[k] = v
|
|
for k, v in self._categories.items():
|
|
category_cache[k] = v
|
|
|
|
|
|
def get_header(header_key, headers):
|
|
vals = []
|
|
for header in headers:
|
|
if header_key in header:
|
|
for val in header.values():
|
|
vals.append(val)
|
|
if len(vals) > 1:
|
|
return vals
|
|
elif len(vals) == 1:
|
|
return vals[0]
|
|
else:
|
|
return False
|
|
|
|
|
|
def search_headers(search, headers):
|
|
for header in headers:
|
|
for val in header.values():
|
|
if search in val:
|
|
return True
|
|
return False
|