BLOCK_EMAIL = [ 'chirhart@amazon.com', 'ron.krogel@citrix.com', 'replieswelcome@duo.com', 'webinars@duo.com', 'diversity@oregonstate.edu', 'jramiro@pagerduty.com', 'info@snowflake.com', 'lisa@duo.com', 'cortana@microsoft.com' ] BLOCK_KEYWORDS = [ 'charitable fund drive', 'election reminder', 'email preferences', 'end these email updates', 'food drive', 'food share', 'general election', 'giving tuesday', 'hardship leave donations needed', 'manage your preferences', 'modify your preferences', 'opt-out', 'opt out', 'prefer not to receive', 'prefer not to see', 'register to vote' 'Samsung SDS America', 'sidekickopen', 'special election', 'subscription preferences', 'survey', 'unsubscribe', 'voter registration', 'want to receive', 'webinar', 'whitepaper', 'wish to receive', ] BLOCK_DOMAINS = [ ] ALLOW = [ 'oregonstate.edu', 'github.com', 'duo.com', 'sns.amazonaws.com', 'opsgenie.net', 'notify@teamdynamixapp.com', 'newsbites@email.sans.org', 'noreply@box.com', 'noreply@email.teams.microsoft.com', 'no-reply@sharepointonline.com', 'slalom.com', 'govdelivery.com', 'linkoregon.org', 'megan@pdxwit.org', 'busyconf.com', 'support@githubsupport.com', 'microsoft.com' ] def filter_message(self, message): # normalize message attributes normalized_to = [x.address.lower() for x in message.to] normalized_from = message.sender.address.lower() normalized_subject = message.subject.lower() # filter alerts-sig if (normalized_from in ['mcc-b11-stor1@oregonstate.edu', 'mcc-b12-stor1@oregonstate.edu', 'ousclus@oregonstate.edu', 'isilon@storage.sig.oregonstate.edu'] or 'alarm.DatastoreDiskUsageAlarm' in message.subject): self._log_result(message, 'moving to alerts-sig') message.move(self._folders['alerts-sig']) return # filter conference spam if 'brocks+conf@onid.oregonstate.edu' in normalized_to: self._log_result(message, 'deleting conference spam') message.delete() return # filter backup-nightly if 'backup-nightly@lists.oregonstate.edu' in message.to: self._log_result(message, 'moving to backup-nightly') message.move(self._folders['backup-nightly']) return # delete HP alert spam if normalized_from == 'alerts@alerts.mail.hpe.com': self._log_result(message, 'deleting HP alert spam') message.delete() return # keep messages from allowed emails and domains for good in ALLOW: if good in normalized_from and normalized_from not in BLOCK_EMAIL: self._log_result(message, f"keeping message from allowed sender {good}") return # junk messages from blocked senders if normalized_from in BLOCK_EMAIL: self._log_result(message, 'junking spam from blocked sender') message.move(self._folders['Junk Email']) return # junk messages with blocked keywords in message body is_spam = False message_body = message.body.lower() for phrase in self._normalized['BLOCK_KEYWORDS']: if phrase in normalized_subject: is_spam = True break if phrase in message_body: is_spam = True break if is_spam: self._log_result(message, 'junking spam containing blocked keyword') message.move(self._folders['Junk Email']) return # process message headers into a sane data structure headers = [] for header in message.message_headers: h = {} h[header['name']] = header['value'].lower() headers.append(h) # junk messages from blocked domains for domain in BLOCK_DOMAINS: if domain in message_from: is_spam = True break if search_headers(domain): is_spam = True break if is_spam: self._log_result(message, 'junking spam from blocked domain') return # junk known spam headers if (get_header('X-Spam-Flag' == 'YES', headers) or int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5): self._log_result(message, 'junking spam with known header') message.move(self._folders['Junk Email']) return # KEEP MESSAGE self._log_result(message, 'keeping message, passed all filter checks') def normalize_lists(self): self._normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS] def get_header(header_key, headers): vals = [] for header in headers: if header_key in header: for val in header.values(): vals.append(val) if len(vals) > 1: return vals elif len(vals) == 1: return vals[0] else: return False def search_headers(search, headers): vals = [] is_found = False for header in headers: for val in header.values(): print(f"is {search} in {val}?") if search in val: return True