diff --git a/filter-rules.py b/filter-rules.py index 66ef6c6..b831675 100644 --- a/filter-rules.py +++ b/filter-rules.py @@ -11,29 +11,19 @@ BLOCK_EMAIL = [ ] BLOCK_KEYWORDS = [ - 'charitable fund drive', - 'election reminder', 'email preferences', 'end these email updates', - 'food drive', - 'food share', - 'general election', - 'giving tuesday', - 'hardship leave donations needed', 'manage your preferences', 'modify your preferences', 'opt-out', 'opt out', 'prefer not to receive', 'prefer not to see', - 'register to vote' 'Samsung SDS America', 'sidekickopen', - 'special election', 'subscription preferences', 'survey', 'unsubscribe', - 'voter registration', 'want to receive', 'webinar', 'whitepaper', @@ -42,7 +32,21 @@ BLOCK_KEYWORDS = [ BLOCK_DOMAINS = [ 'customeriomail.com', - 'mailgun.net' + 'mailgun.net', + 'norstar.net' +] + +BLOCK_ANNOYING = [ + 'charitable fund drive', + 'election reminder', + 'food drive', + 'food share', + 'general election', + 'giving tuesday', + 'hardship leave donations needed', + 'register to vote' + 'special election', + 'voter registration' ] ALLOW = [ @@ -65,11 +69,25 @@ ALLOW = [ 'microsoft.com' ] +normalized = {} +folder_cache = {} + def filter_message(self, message): # normalize message attributes normalized_to = [x.address.lower() for x in message.to] normalized_from = message.sender.address.lower() - normalized_subject = message.subject.lower() + + # filter unactionable coresys emails + if 'coresys@lists.oregonstate.edu' in normalized_from: + unactionable = [x.lower() for x in [ + 'DWPRODRAW Verification', + 'BFPDB Reimbursement Job Monitor - OK', + 'DSDB Important Job Monitor - OK' + ]] + if is_in_message(unactionable, message): + self._log_result(message, 'moving to unactionable') + move_message(message, 'unactionable') + return # filter alerts-sig if (normalized_from in ['mcc-b11-stor1@oregonstate.edu', @@ -78,7 +96,7 @@ def filter_message(self, message): 'isilon@storage.sig.oregonstate.edu'] or 'alarm.DatastoreDiskUsageAlarm' in message.subject): self._log_result(message, 'moving to alerts-sig') - message.move(self._folders['alerts-sig']) + move_message(message, 'alerts-sig') return # filter conference spam @@ -90,7 +108,7 @@ def filter_message(self, message): # filter backup-nightly if 'backup-nightly@lists.oregonstate.edu' in message.to: self._log_result(message, 'moving to backup-nightly') - message.move(self._folders['backup-nightly']) + move_message(message, 'backup-nightly') return # delete HP alert spam @@ -99,6 +117,12 @@ def filter_message(self, message): message.delete() return + # delete other spam + if is_in_message(normalized['BLOCK_ANNOYING'], message): + self._log_result(message, 'junking spam containing annoying content') + move_message(message, 'Junk Email') + return + # keep messages from allowed emails and domains for good in ALLOW: if good in normalized_from and normalized_from not in BLOCK_EMAIL: @@ -109,22 +133,14 @@ def filter_message(self, message): # junk messages from blocked senders if normalized_from in BLOCK_EMAIL: self._log_result(message, 'junking spam from blocked sender') - message.move(self._folders['Junk Email']) + move_message(message, 'Junk Email') return - # junk messages with blocked keywords in message body - is_spam = False - message_body = message.body.lower() - for phrase in self._normalized['BLOCK_KEYWORDS']: - if phrase in normalized_subject: - is_spam = True - break - if phrase in message_body: - is_spam = True - break - if is_spam: + # junk messages with blocked keywords + if is_in_message(normalized['BLOCK_KEYWORDS'], message): self._log_result(message, 'junking spam containing blocked keyword') - message.move(self._folders['Junk Email']) + move_message(message, 'Junk Email') + return # process message headers into a sane data structure @@ -144,21 +160,49 @@ def filter_message(self, message): break if is_spam: self._log_result(message, 'junking spam from blocked domain') - message.move(self._folders['Junk Email']) + move_message(message, 'Junk Email') return # junk known spam headers if (get_header('X-Spam-Flag' == 'YES', headers) or int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5): self._log_result(message, 'junking spam with known header') - message.move(self._folders['Junk Email']) + move_message(message, 'Junk Email') return # KEEP MESSAGE self._log_result(message, 'keeping message, passed all filter checks') -def normalize_lists(self): - self._normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS] +def is_in_message(list_, message): + """search a message for a list of strings + + Returns True if any string in the list is found in the message + """ + is_found = False + message_body = message.body.lower() + message_subject = message.subject.lower() + + for term in list_: + if term in message_subject: + is_found = True + break + if term in message_body: + is_found = True + break + return is_found + +def move_message(message, folder): + if folder not in folder_cache: + return False + message.move(folder_cache[folder]) + +def init_filters(self): + # hack to copy a dict from parent object into local object + for k,v in self._folders.items(): + folder_cache[k] = v + + normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS] + normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING] def get_header(header_key, headers): vals = [] @@ -174,9 +218,8 @@ def get_header(header_key, headers): return False def search_headers(search, headers): - vals = [] - is_found = False for header in headers: for val in header.values(): if search in val: return True + return False