import re BLOCK_EMAIL = [ 'support-noreply@status.duosecurity.com', 'alerts@alerts.mail.hpe.com', 'viva-noreply@microsoft.com', 'info@snowflake.com', 'noreply-marketplace@zoom.us', 'equal.opportunity@oregonstate.edu', 'diversity@oregonstate.edu', 'evals@oregonstate.edu', 'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com', ] BLOCK_KEYWORDS = [ 'advertising service', 'email campaign', 'email preferences', 'end these email updates', 'hs-sales-engage.com', 'long term care', 'marketing', 'megacast', 'my subscription', 'newsletter', 'no longer would like to be contacted', 'opt-out', 'opt out', 'piano', 'pmp exam', 'prefer not to receive', 'prefer not to see', 'rather not receive', 'remove from list', 'remove from this list', 'Samsung SDS America', 'sidekickopen', 'subscription preferences', 'survey', 'take-me-off', 'this advertisement', 'unsub_center', 'unsubscribe', 'want to receive', 'webcast', 'webinar', 'welder', 'white paper', 'whitepaper', 'wish to be contacted', 'wish to receive', 'your notifications', 'your preferences', 'your subscription', ] BLOCK_DOMAINS = [ 'aafintl.com', 'atscale.com', 'astutechsolutions.com', 'bytespeed.com', 'checkpoint.com', 'cmadvantage', 'customeriomail.com', 'denodo.com', 'exacttarget.com', 'freshsales.io', 'hrciconnect.com', 'hso.com', 'ikigailabs.io', 'impetus.com', 'informareachmedia.com', 'javentechnologies.com', 'kuusakoski.com', 'mailgun.net', 'malwarebytes.com', 'matrixservice.com', 'mimecast.com', 'mktomail.com', 'msgfocus.com', 'norstar.net', 'orjuno.com', 'pphosted.com', 'qualitynetworks.com', 'radware.com', 'rsmatco.com', 'rubrain.agency', 'sendgrid.net', 'sparkpostmail.com', 'techmate.com', 'thesourcery.com', 'trustedmailservers.com', 'ubtiinc.com', 'zerowait.com', 'znsrc.com' ] BLOCK_ANNOYING = [ 'charitable fund drive', 'election reminder', 'faculty senate agenda', 'food drive', 'food share', 'general election', 'giving tuesday', 'hardship leave donations needed', 'register to vote', 'service desk survey request', 'special election', 'voter registration' ] ALLOW = [ 'oregonstate.edu', 'github.com', 'osu.atlassian.net', 'oregonstateuniversity.atlassian.net', 'sns.amazonaws.com', 'opsgenie.net', 'notify@teamdynamixapp.com', 'newsbites@email.sans.org', 'noreply@box.com', 'noreply@email.teams.microsoft.com', 'no-reply@sharepointonline.com', 'govdelivery.com', 'linkoregon.org', 'pdxwit.org', 'busyconf.com', 'support@githubsupport.com', 'microsoft.com', 'docusign.net', 'ideal-logic.com', 'heliocampus.atlassian.net', 'ctptravelservices.com', 'Travel@concursolutions.com', 'ghost.io', 'nytdirect@nytimes.com' ] normalized = {} folder_cache = {} category_cache = {} def filter_message(self, message): # normalize message attributes normalized_to = [x.address.lower() for x in message.to] normalized_from = message.sender.address.lower() # process message headers into a sane data structure headers = [] for header in message.message_headers: h = {} h[header['name']] = header['value'].lower() headers.append(h) # filter unactionable IAR emails automated_sources = [ 'coresys@lists.oregonstate.edu', 'iar.ref@oregonstate.edu', 'iar.systems-team@oregonstate.edu', 'changes_osu@heliocampus.com', 'no-reply@vmockmail.com', 'noreply-beaverhub@oregonstate.edu', 'chatter-beaverhub@oregonstate.edu', ] if bool([x for x in automated_sources if(x in normalized_from)]): unactionable = [x.lower() for x in [ 'DWPRODRAW Verification', 'Job Monitor', ' - OK', 'ODProd Row Count Comparison', 'Oracle ODprod Sessions Older Than Today', 'DSDB Server Audit', 'UserBase.Users users deactivated due to ORG changes and termination', 'SSRS Datasets Needing Caching', 'Audit Update', 'ScholarUniverse to GRRS load', 'OACIS Pending Actions Notice', 'You Have OSUF Disapproved Reimbursements', 'Your GRRS to Banner scholarship load report', 'You Have OSUF Reimbursements to Review', 'You Have Scholarship Payment Plans', 'You Have Direct Payment Requests to Review', 'You Have Disapproved Scholarship Payment Plans', 'OSUF Reimbursements Needing Your Prompt Attention', 'Redistribution Verification Error', 'Direct Payment Request', 'Status Change in your Detail Code Request', 'loaded into the Index Reimbursement System', 'JV required for', 'Changes via Tableau REST API at OSU', 'DSDBTEST', 'VMock CSV S3 Upload', 'Your Daily Digest for Oregon State University' ]] if is_in_message(unactionable, message): self._log_result(message, 'moving to unactionable') move_message(message, 'zzz-unactionable') return # filter unactionable InCommon SSL cert emails if 'support@cert-manager.com' in normalized_from: actionable = [ 'iar', 'sig', 'analytics', 'cwp-access', 'dsdb', 'tableau' ] if is_in_message(actionable, message): self._log_result(message, f"keeping message for actionable SSL notification") return else: self._log_result(message, 'moving to unactionable') move_message(message, 'zzz-unactionable') return # filter Student CRM if 'noreply-beaverhub@oregonstate.edu' in normalized_from: unactionable = [ 'has been assigned to you or your queue', ] if is_in_message(unactionable, message): self._log_result(message, 'moving to unactionable') move_message(message, 'zzz-unactionable') return # filter dependabot if ('dependabot[bot]' in str(message.sender) or is_in_message(['Your Dependabot alerts'], message)): self._log_result(message, 'moving to dependabot') move_message(message, 'zzz-dependabot') return # filter ACTWON if 'actwon_administration@lists.oregonstate.edu' in normalized_from: self._log_result(message, 'moving to ACTWON') move_message(message, 'lists/ACTWON') return # filter ACUG if 'isacug@oregonstate.edu' in normalized_to: self._log_result(message, 'moving to ACUG') move_message(message, 'lists/ACUG') return # filter HelioCampus helpdesk if 'jira@heliocampus.atlassian.net' in normalized_from: self._log_result(message, 'moving to zzz-hc-helpdesk') move_message(message, 'zzz-hc-helpdesk') return # filter alerts-sig if (normalized_from in ['mcc-b11-stor1@oregonstate.edu', 'mcc-b12-stor1@oregonstate.edu', 'ousclus@oregonstate.edu', 'isilon@storage.sig.oregonstate.edu', 'me4012@sig.oregonstate.edu'] or 'alarm.DatastoreDiskUsageAlarm' in message.subject): self._log_result(message, 'moving to alerts-sig') move_message(message, 'lists/alerts-sig') return # filter backup-nightly if 'backup-nightly@lists.oregonstate.edu' in message.to: self._log_result(message, 'moving to backup-nightly') move_message(message, 'lists/backup-nightly') return # filter quarantine spam if 'quarantine@messaging.microsoft.com' in normalized_from: self._log_result(message, 'moving to unactionable') move_message(message, 'zzz-unactionable') return # delete Atlassian spam if ('confluence@osu.atlassian.net' in normalized_from and '[Confluence] Daily Digest' in message.subject): self._log_result(message, 'deleting atlassian spam') message.delete() return # delete Salesforce spam if 'salesforce.com' in normalized_from: unactionable = [x.lower() for x in [ 'sandbox', 'resetting your Salesforce password', 'new Salesforce security token', ]] if is_in_message(unactionable, message): self._log_result(message, 'deleting salesforce spam') message.delete() return # delete Rave junk if ('guardian@getrave.com' in normalized_from and 'new guardian chat' in message.subject.lower()): self._log_result(message, 'deleting rave spam') message.delete() return # delete conference spam if ('brock+conf@onid.oregonstate.edu' in normalized_to or 'brock+conf@oregonstate.edu' in normalized_to): self._log_result(message, 'deleting conference spam') message.delete() return # delete OSU IT Managers list spam if 'osuitmanagers@oregonstate.edu' in normalized_to: self._log_result(message, 'deleting osu it managers spam') message.delete() return # delete other spam if is_in_message(BLOCK_ANNOYING, message): self._log_result(message, 'junking spam containing annoying content') move_message(message, 'Junk Email') return # add 'HelioCampus' category to messages from HC if '@heliocampus' in normalized_from: self._log_result(message, "adding category 'HelioCampus'") add_category(message, 'HelioCampus') return # keep messages from allowed emails and domains for good in ALLOW: if good in normalized_from and normalized_from not in BLOCK_EMAIL: self._log_result(message, f"keeping message from allowed sender {good}") return # junk messages from blocked senders if normalized_from in BLOCK_EMAIL: self._log_result(message, 'junking spam from blocked sender') move_message(message, 'Junk Email') return # junk messages with blocked keywords if is_in_message(BLOCK_KEYWORDS, message): self._log_result(message, 'junking spam containing blocked keyword') move_message(message, 'Junk Email') return # junk messages from blocked domains is_spam = False for domain in BLOCK_DOMAINS: if domain in normalized_from: is_spam = True break if search_headers(domain, headers): is_spam = True break if is_spam: self._log_result(message, 'junking spam from blocked domain') move_message(message, 'Junk Email') return # junk known spam headers if (int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5 or get_header('X-Mailgun-List-Address', headers) or get_header('X-SFDC-EmailCategory', headers) == 'apimassmail'): self._log_result(message, 'junking spam with known header') move_message(message, 'Junk Email') return # add 'OSU Inform' category to internal messages sent to DLs if ('@oregonstate.edu' in normalized_from and ((not get_header('To', headers) and not get_header('List-Id', headers)) or ('inform-c' in ' '.join(normalized_to)))): self._log_result(message, "adding category 'OSU Inform'") add_category(message, 'OSU Inform') return # KEEP MESSAGE self._log_result(message, 'keeping message, passed all filter checks') def is_in_message(list_, message): """search a message for a list of strings Returns True if any string in the list is found in the message """ is_found = False # normalize inputs search_terms = [x.lower() for x in list_] message_body = message.body.lower().replace('\ufeff', '') message_subject = message.subject.lower().replace('\ufeff', '') message_from = message.sender.address.lower().replace('\ufeff', '') for term in search_terms: if (re.search(term, message_subject) or re.search(term, message_from) or re.search(term, message_body)): is_found = True break return is_found def move_message(message, folder_name): """ move a message to a folder stored in the folder cache """ if folder_name not in folder_cache: return False message.move(folder_cache[folder_name]) def add_category(message, category_name): """ add an Outlook category to a message """ if category_name not in category_cache: return False message.add_category(category_cache[category_name]) message.save_message() def init_filters(self): # hack to copy a dict from parent object into local object for k,v in self._folders.items(): folder_cache[k] = v for k,v in self._categories.items(): category_cache[k] = v def get_header(header_key, headers): vals = [] for header in headers: if header_key in header: for val in header.values(): vals.append(val) if len(vals) > 1: return vals elif len(vals) == 1: return vals[0] else: return False def search_headers(search, headers): for header in headers: for val in header.values(): if search in val: return True return False