From d8417b4449de5ec99a60e121c220411dd6bd3e47 Mon Sep 17 00:00:00 2001 From: Jacob Mastel Date: Wed, 17 Sep 2025 09:44:26 -0700 Subject: [PATCH] Switched to black formatting --- filter-rules.py | 551 +++++++++++++++++++++++++----------------------- 1 file changed, 284 insertions(+), 267 deletions(-) diff --git a/filter-rules.py b/filter-rules.py index d2812be..57f0d4c 100644 --- a/filter-rules.py +++ b/filter-rules.py @@ -1,148 +1,149 @@ import re BLOCK_EMAIL = [ - 'support-noreply@status.duosecurity.com', - 'alerts@alerts.mail.hpe.com', - 'viva-noreply@microsoft.com', - 'info@snowflake.com', - 'noreply-marketplace@zoom.us', - 'equal.opportunity@oregonstate.edu', - 'diversity@oregonstate.edu', - 'evals@oregonstate.edu', - 'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com', - 'noreply@msexperience.microsoft.com', + "support-noreply@status.duosecurity.com", + "alerts@alerts.mail.hpe.com", + "viva-noreply@microsoft.com", + "info@snowflake.com", + "noreply-marketplace@zoom.us", + "equal.opportunity@oregonstate.edu", + "diversity@oregonstate.edu", + "evals@oregonstate.edu", + "sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com", + "noreply@msexperience.microsoft.com", ] BLOCK_KEYWORDS = [ - 'advertising service', - 'email campaign', - 'email preferences', - 'end these email updates', - 'hs-sales-engage.com', - 'long term care', - 'marketing', - 'megacast', - 'my subscription', - 'newsletter', - 'no longer would like to be contacted', - 'opt-out', - 'opt out', - 'piano', - 'pmp exam', - 'prefer not to receive', - 'prefer not to see', - 'rather not receive', - 'remove from list', - 'remove from this list', - 'Samsung SDS America', - 'sidekickopen', - 'subscription preferences', - 'survey', - 'take-me-off', - 'this advertisement', - 'unsub_center', - 'unsubscribe', - 'want to receive', - 'webcast', - 'webinar', - 'welder', - 'white paper', - 'whitepaper', - 'wish to be contacted', - 'wish to receive', - 'your notifications', - 'your preferences', - 'your subscription', + "advertising service", + "email campaign", + "email preferences", + "end these email updates", + "hs-sales-engage.com", + "long term care", + "marketing", + "megacast", + "my subscription", + "newsletter", + "no longer would like to be contacted", + "opt-out", + "opt out", + "piano", + "pmp exam", + "prefer not to receive", + "prefer not to see", + "rather not receive", + "remove from list", + "remove from this list", + "Samsung SDS America", + "sidekickopen", + "subscription preferences", + "survey", + "take-me-off", + "this advertisement", + "unsub_center", + "unsubscribe", + "want to receive", + "webcast", + "webinar", + "welder", + "white paper", + "whitepaper", + "wish to be contacted", + "wish to receive", + "your notifications", + "your preferences", + "your subscription", ] BLOCK_DOMAINS = [ - 'aafintl.com', - 'atscale.com', - 'astutechsolutions.com', - 'bytespeed.com', - 'checkpoint.com', - 'cmadvantage', - 'customeriomail.com', - 'denodo.com', - 'exacttarget.com', - 'freshsales.io', - 'hrciconnect.com', - 'hso.com', - 'ikigailabs.io', - 'impetus.com', - 'informareachmedia.com', - 'javentechnologies.com', - 'kuusakoski.com', - 'mailgun.net', - 'malwarebytes.com', - 'matrixservice.com', - 'mimecast.com', - 'mktomail.com', - 'msgfocus.com', - 'norstar.net', - 'orjuno.com', - 'pphosted.com', - 'qualitynetworks.com', - 'radware.com', - 'rsmatco.com', - 'rubrain.agency', - 'sendgrid.net', - 'sparkpostmail.com', - 'techmate.com', - 'thesourcery.com', - 'trustedmailservers.com', - 'ubtiinc.com', - 'zerowait.com', - 'znsrc.com' + "aafintl.com", + "atscale.com", + "astutechsolutions.com", + "bytespeed.com", + "checkpoint.com", + "cmadvantage", + "customeriomail.com", + "denodo.com", + "exacttarget.com", + "freshsales.io", + "hrciconnect.com", + "hso.com", + "ikigailabs.io", + "impetus.com", + "informareachmedia.com", + "javentechnologies.com", + "kuusakoski.com", + "mailgun.net", + "malwarebytes.com", + "matrixservice.com", + "mimecast.com", + "mktomail.com", + "msgfocus.com", + "norstar.net", + "orjuno.com", + "pphosted.com", + "qualitynetworks.com", + "radware.com", + "rsmatco.com", + "rubrain.agency", + "sendgrid.net", + "sparkpostmail.com", + "techmate.com", + "thesourcery.com", + "trustedmailservers.com", + "ubtiinc.com", + "zerowait.com", + "znsrc.com", ] BLOCK_ANNOYING = [ - 'charitable fund drive', - 'election reminder', - 'faculty senate agenda', - 'food drive', - 'food share', - 'general election', - 'giving tuesday', - 'hardship leave donations needed', - 'register to vote', - 'service desk survey request', - 'special election', - 'voter registration' + "charitable fund drive", + "election reminder", + "faculty senate agenda", + "food drive", + "food share", + "general election", + "giving tuesday", + "hardship leave donations needed", + "register to vote", + "service desk survey request", + "special election", + "voter registration", ] ALLOW = [ - 'oregonstate.edu', - 'github.com', - 'osu.atlassian.net', - 'oregonstateuniversity.atlassian.net', - 'sns.amazonaws.com', - 'opsgenie.net', - 'notify@teamdynamixapp.com', - 'newsbites@email.sans.org', - 'noreply@box.com', - 'noreply@email.teams.microsoft.com', - 'no-reply@sharepointonline.com', - 'govdelivery.com', - 'linkoregon.org', - 'pdxwit.org', - 'busyconf.com', - 'support@githubsupport.com', - 'microsoft.com', - 'docusign.net', - 'ideal-logic.com', - 'heliocampus.atlassian.net', - 'ctptravelservices.com', - 'Travel@concursolutions.com', - 'ghost.io', - 'orders@catertrax.com', - 'nytdirect@nytimes.com' + "oregonstate.edu", + "github.com", + "osu.atlassian.net", + "oregonstateuniversity.atlassian.net", + "sns.amazonaws.com", + "opsgenie.net", + "notify@teamdynamixapp.com", + "newsbites@email.sans.org", + "noreply@box.com", + "noreply@email.teams.microsoft.com", + "no-reply@sharepointonline.com", + "govdelivery.com", + "linkoregon.org", + "pdxwit.org", + "busyconf.com", + "support@githubsupport.com", + "microsoft.com", + "docusign.net", + "ideal-logic.com", + "heliocampus.atlassian.net", + "ctptravelservices.com", + "Travel@concursolutions.com", + "ghost.io", + "orders@catertrax.com", + "nytdirect@nytimes.com", ] normalized = {} folder_cache = {} category_cache = {} + def filter_message(self, message): # normalize message attributes normalized_to = [x.address.lower() for x in message.to] @@ -152,217 +153,223 @@ def filter_message(self, message): headers = [] for header in message.message_headers: h = {} - h[header['name']] = header['value'].lower() + h[header["name"]] = header["value"].lower() headers.append(h) # filter unactionable IAR emails automated_sources = [ - 'coresys@lists.oregonstate.edu', - 'iar.ref@oregonstate.edu', - 'iar.systems-team@oregonstate.edu', - 'changes_osu@heliocampus.com', - 'no-reply@vmockmail.com', - 'noreply-beaverhub@oregonstate.edu', - 'chatter-beaverhub@oregonstate.edu', + "coresys@lists.oregonstate.edu", + "iar.ref@oregonstate.edu", + "iar.systems-team@oregonstate.edu", + "changes_osu@heliocampus.com", + "no-reply@vmockmail.com", + "noreply-beaverhub@oregonstate.edu", + "chatter-beaverhub@oregonstate.edu", ] - if bool([x for x in automated_sources if(x in normalized_from)]): - unactionable = [x.lower() for x in [ - 'DWPRODRAW Verification', - 'Job Monitor', - ' - OK', - 'ODProd Row Count Comparison', - 'Oracle ODprod Sessions Older Than Today', - 'DSDB Server Audit', - 'UserBase.Users users deactivated due to ORG changes and termination', - 'SSRS Datasets Needing Caching', - 'Audit Update', - 'ScholarUniverse to GRRS load', - 'OACIS Pending Actions Notice', - 'You Have OSUF Disapproved Reimbursements', - 'Your GRRS to Banner scholarship load report', - 'You Have OSUF Reimbursements to Review', - 'You Have Scholarship Payment Plans', - 'You Have Direct Payment Requests to Review', - 'You Have Disapproved Scholarship Payment Plans', - 'OSUF Reimbursements Needing Your Prompt Attention', - 'Redistribution Verification Error', - 'Direct Payment Request', - 'Status Change in your Detail Code Request', - 'loaded into the Index Reimbursement System', - 'JV required for', - 'Changes via Tableau REST API at OSU', - 'DSDBTEST', - 'VMock CSV S3 Upload', - 'Your Daily Digest for Oregon State University' - ]] + if bool([x for x in automated_sources if (x in normalized_from)]): + unactionable = [ + x.lower() + for x in [ + "DWPRODRAW Verification", + "Job Monitor", + " - OK", + "ODProd Row Count Comparison", + "Oracle ODprod Sessions Older Than Today", + "DSDB Server Audit", + "UserBase.Users users deactivated due to ORG changes and termination", + "SSRS Datasets Needing Caching", + "Audit Update", + "ScholarUniverse to GRRS load", + "OACIS Pending Actions Notice", + "You Have OSUF Disapproved Reimbursements", + "Your GRRS to Banner scholarship load report", + "You Have OSUF Reimbursements to Review", + "You Have Scholarship Payment Plans", + "You Have Direct Payment Requests to Review", + "You Have Disapproved Scholarship Payment Plans", + "OSUF Reimbursements Needing Your Prompt Attention", + "Redistribution Verification Error", + "Direct Payment Request", + "Status Change in your Detail Code Request", + "loaded into the Index Reimbursement System", + "JV required for", + "Changes via Tableau REST API at OSU", + "DSDBTEST", + "VMock CSV S3 Upload", + "Your Daily Digest for Oregon State University", + ] + ] if is_in_message(unactionable, message): - self._log_result(message, 'moving to unactionable') - move_message(message, 'zzz-unactionable') + self._log_result(message, "moving to unactionable") + move_message(message, "zzz-unactionable") return # filter servicenow email notifications - if 'mysupport-replies@oregonstate.edu' in normalized_from: - keep = [ - 'opened on your behalf', - 'your request REQ' - ] + if "mysupport-replies@oregonstate.edu" in normalized_from: + keep = ["opened on your behalf", "your request REQ"] if is_in_message(keep, message): self._log_result(message, "keeping servicenow message") return else: - self._log_result(message, 'moving to servicenow') - move_message(message, 'zzz-servicenow') + self._log_result(message, "moving to servicenow") + move_message(message, "zzz-servicenow") return # filter unactionable InCommon SSL cert emails - if 'support@cert-manager.com' in normalized_from: - actionable = [ - 'iar', - 'sig', - 'analytics', - 'cwp-access', - 'dsdb', - 'tableau' - ] + if "support@cert-manager.com" in normalized_from: + actionable = ["iar", "sig", "analytics", "cwp-access", "dsdb", "tableau"] if is_in_message(actionable, message): - self._log_result(message, - f"keeping message for actionable SSL notification") + self._log_result( + message, f"keeping message for actionable SSL notification" + ) return else: - self._log_result(message, 'moving to unactionable') - move_message(message, 'zzz-unactionable') + self._log_result(message, "moving to unactionable") + move_message(message, "zzz-unactionable") return # filter Student CRM - if 'noreply-beaverhub@oregonstate.edu' in normalized_from: + if "noreply-beaverhub@oregonstate.edu" in normalized_from: unactionable = [ - 'has been assigned to you or your queue', + "has been assigned to you or your queue", ] if is_in_message(unactionable, message): - self._log_result(message, 'moving to unactionable') - move_message(message, 'zzz-unactionable') + self._log_result(message, "moving to unactionable") + move_message(message, "zzz-unactionable") return # filter dependabot - if ('dependabot[bot]' in str(message.sender) - or is_in_message(['Your Dependabot alerts'], message)): - self._log_result(message, 'moving to dependabot') - move_message(message, 'zzz-dependabot') + if "dependabot[bot]" in str(message.sender) or is_in_message( + ["Your Dependabot alerts"], message + ): + self._log_result(message, "moving to dependabot") + move_message(message, "zzz-dependabot") return # filter ACTWON - if 'actwon_administration@lists.oregonstate.edu' in normalized_from: - self._log_result(message, 'moving to ACTWON') - move_message(message, 'lists/ACTWON') + if "actwon_administration@lists.oregonstate.edu" in normalized_from: + self._log_result(message, "moving to ACTWON") + move_message(message, "lists/ACTWON") return # filter ACUG - if 'isacug@oregonstate.edu' in normalized_to: - self._log_result(message, 'moving to ACUG') - move_message(message, 'lists/ACUG') + if "isacug@oregonstate.edu" in normalized_to: + self._log_result(message, "moving to ACUG") + move_message(message, "lists/ACUG") return # filter HelioCampus helpdesk - if 'jira@heliocampus.atlassian.net' in normalized_from: - self._log_result(message, 'moving to zzz-hc-helpdesk') - move_message(message, 'zzz-hc-helpdesk') + if "jira@heliocampus.atlassian.net" in normalized_from: + self._log_result(message, "moving to zzz-hc-helpdesk") + move_message(message, "zzz-hc-helpdesk") return # filter alerts-sig - if (normalized_from in ['mcc-b11-stor1@oregonstate.edu', - 'mcc-b12-stor1@oregonstate.edu', - 'ousclus@oregonstate.edu', - 'isilon@storage.sig.oregonstate.edu', - 'me4012@sig.oregonstate.edu'] - or 'alarm.DatastoreDiskUsageAlarm' in message.subject): - self._log_result(message, 'moving to alerts-sig') - move_message(message, 'lists/alerts-sig') + if ( + normalized_from + in [ + "mcc-b11-stor1@oregonstate.edu", + "mcc-b12-stor1@oregonstate.edu", + "ousclus@oregonstate.edu", + "isilon@storage.sig.oregonstate.edu", + "me4012@sig.oregonstate.edu", + ] + or "alarm.DatastoreDiskUsageAlarm" in message.subject + ): + self._log_result(message, "moving to alerts-sig") + move_message(message, "lists/alerts-sig") return # filter backup-nightly - if 'backup-nightly@lists.oregonstate.edu' in message.to: - self._log_result(message, 'moving to backup-nightly') - move_message(message, 'lists/backup-nightly') + if "backup-nightly@lists.oregonstate.edu" in message.to: + self._log_result(message, "moving to backup-nightly") + move_message(message, "lists/backup-nightly") return # filter quarantine spam - if 'quarantine@messaging.microsoft.com' in normalized_from: - self._log_result(message, 'moving to unactionable') - move_message(message, 'zzz-unactionable') + if "quarantine@messaging.microsoft.com" in normalized_from: + self._log_result(message, "moving to unactionable") + move_message(message, "zzz-unactionable") return # delete atlassian spam - if 'confluence@osu.atlassian.net' in normalized_from: + if "confluence@osu.atlassian.net" in normalized_from: unactionable = [ - 'sv1_ds_atlassian', - '[confluence] daily digest', + "sv1_ds_atlassian", + "[confluence] daily digest", ] if is_in_message(unactionable, message): - self._log_result(message, 'deleting atlassian spam') + self._log_result(message, "deleting atlassian spam") message.delete() return # delete salesforce spam - if 'salesforce.com' in normalized_from: - unactionable = [x.lower() for x in [ - 'sandbox', - 'resetting your Salesforce password', - 'new Salesforce security token', - ]] + if "salesforce.com" in normalized_from: + unactionable = [ + x.lower() + for x in [ + "sandbox", + "resetting your Salesforce password", + "new Salesforce security token", + ] + ] if is_in_message(unactionable, message): - self._log_result(message, 'deleting salesforce spam') + self._log_result(message, "deleting salesforce spam") message.delete() return # delete rave junk - if ('guardian@getrave.com' in normalized_from - and 'new guardian chat' in message.subject.lower()): - self._log_result(message, 'deleting rave spam') + if ( + "guardian@getrave.com" in normalized_from + and "new guardian chat" in message.subject.lower() + ): + self._log_result(message, "deleting rave spam") message.delete() return # delete conference spam - if ('brock+conf@onid.oregonstate.edu' in normalized_to - or 'brock+conf@oregonstate.edu' in normalized_to): - self._log_result(message, 'deleting conference spam') + if ( + "brock+conf@onid.oregonstate.edu" in normalized_to + or "brock+conf@oregonstate.edu" in normalized_to + ): + self._log_result(message, "deleting conference spam") message.delete() return # delete OSU IT Managers list spam - if 'osuitmanagers@oregonstate.edu' in normalized_to: - self._log_result(message, 'deleting osu it managers spam') + if "osuitmanagers@oregonstate.edu" in normalized_to: + self._log_result(message, "deleting osu it managers spam") message.delete() return # delete other spam if is_in_message(BLOCK_ANNOYING, message): - self._log_result(message, 'junking spam containing annoying content') - move_message(message, 'Junk Email') + self._log_result(message, "junking spam containing annoying content") + move_message(message, "Junk Email") return # add 'HelioCampus' category to messages from HC - if '@heliocampus' in normalized_from: + if "@heliocampus" in normalized_from: self._log_result(message, "adding category 'HelioCampus'") - add_category(message, 'HelioCampus') + add_category(message, "HelioCampus") return # keep messages from allowed emails and domains for good in ALLOW: if good in normalized_from and normalized_from not in BLOCK_EMAIL: - self._log_result(message, - f"keeping message from allowed sender {good}") + self._log_result(message, f"keeping message from allowed sender {good}") return # junk messages from blocked senders if normalized_from in BLOCK_EMAIL: - self._log_result(message, 'junking spam from blocked sender') - move_message(message, 'Junk Email') + self._log_result(message, "junking spam from blocked sender") + move_message(message, "Junk Email") return # junk messages with blocked keywords if is_in_message(BLOCK_KEYWORDS, message): - self._log_result(message, 'junking spam containing blocked keyword') - move_message(message, 'Junk Email') + self._log_result(message, "junking spam containing blocked keyword") + move_message(message, "Junk Email") return # junk messages from blocked domains @@ -375,29 +382,32 @@ def filter_message(self, message): is_spam = True break if is_spam: - self._log_result(message, 'junking spam from blocked domain') - move_message(message, 'Junk Email') + self._log_result(message, "junking spam from blocked domain") + move_message(message, "Junk Email") return # junk known spam headers - if (int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5 - or get_header('X-Mailgun-List-Address', headers) - or get_header('X-SFDC-EmailCategory', headers) == 'apimassmail'): - self._log_result(message, 'junking spam with known header') - move_message(message, 'Junk Email') + if ( + int(get_header("X-MS-Exchange-Organization-SCL", headers)) >= 5 + or get_header("X-Mailgun-List-Address", headers) + or get_header("X-SFDC-EmailCategory", headers) == "apimassmail" + ): + self._log_result(message, "junking spam with known header") + move_message(message, "Junk Email") return # add 'OSU Inform' category to internal messages sent to DLs - if ('@oregonstate.edu' in normalized_from - and ((not get_header('To', headers) - and not get_header('List-Id', headers)) - or ('inform-c' in ' '.join(normalized_to)))): + if "@oregonstate.edu" in normalized_from and ( + (not get_header("To", headers) and not get_header("List-Id", headers)) + or ("inform-c" in " ".join(normalized_to)) + ): self._log_result(message, "adding category 'OSU Inform'") - add_category(message, 'OSU Inform') + add_category(message, "OSU Inform") return # KEEP MESSAGE - self._log_result(message, 'keeping message, passed all filter checks') + self._log_result(message, "keeping message, passed all filter checks") + def is_in_message(list_, message): """search a message for a list of strings @@ -408,38 +418,44 @@ def is_in_message(list_, message): # normalize inputs search_terms = [x.lower() for x in list_] - message_body = message.body.lower().replace('\ufeff', '') - message_subject = message.subject.lower().replace('\ufeff', '') - message_from = message.sender.address.lower().replace('\ufeff', '') + message_body = message.body.lower().replace("\ufeff", "") + message_subject = message.subject.lower().replace("\ufeff", "") + message_from = message.sender.address.lower().replace("\ufeff", "") for term in search_terms: - if (re.search(term, message_subject) - or re.search(term, message_from) - or re.search(term, message_body)): + if ( + re.search(term, message_subject) + or re.search(term, message_from) + or re.search(term, message_body) + ): is_found = True break return is_found + def move_message(message, folder_name): - """ move a message to a folder stored in the folder cache """ + """move a message to a folder stored in the folder cache""" if folder_name not in folder_cache: return False message.move(folder_cache[folder_name]) + def add_category(message, category_name): - """ add an Outlook category to a message """ + """add an Outlook category to a message""" if category_name not in category_cache: return False message.add_category(category_cache[category_name]) message.save_message() + def init_filters(self): # hack to copy a dict from parent object into local object - for k,v in self._folders.items(): + for k, v in self._folders.items(): folder_cache[k] = v - for k,v in self._categories.items(): + for k, v in self._categories.items(): category_cache[k] = v + def get_header(header_key, headers): vals = [] for header in headers: @@ -453,6 +469,7 @@ def get_header(header_key, headers): else: return False + def search_headers(search, headers): for header in headers: for val in header.values():