Compare commits
10 Commits
6e942d329f
...
629942d447
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
629942d447 | ||
|
|
ce3ec9b425 | ||
|
|
58978a718f | ||
|
|
163d97120e | ||
|
|
5dbb0cb8af | ||
|
|
28134aa957 | ||
|
|
fa2cb03b4d | ||
|
|
5b9a5fcf9d | ||
|
|
839c3ff389 | ||
|
|
defa1de136 |
171
filter-rules.py
171
filter-rules.py
@@ -1,34 +1,34 @@
|
||||
import re
|
||||
|
||||
BLOCK_EMAIL = [
|
||||
'chirhart@amazon.com',
|
||||
'ron.krogel@citrix.com',
|
||||
'lisa@duo.com',
|
||||
'replieswelcome@duo.com',
|
||||
'webinars@duo.com',
|
||||
'support-noreply@status.duosecurity.com',
|
||||
'alerts@alerts.mail.hpe.com',
|
||||
'viva-noreply@microsoft.com',
|
||||
'jramiro@pagerduty.com',
|
||||
'info@snowflake.com',
|
||||
'noreply-marketplace@zoom.us',
|
||||
'equal.opportunity@oregonstate.edu',
|
||||
'diversity@oregonstate.edu',
|
||||
'evals@oregonstate.edu',
|
||||
'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com',
|
||||
'noreply@msexperience.microsoft.com',
|
||||
]
|
||||
|
||||
BLOCK_KEYWORDS = [
|
||||
'advertising services',
|
||||
'closeml.com',
|
||||
'advertising service',
|
||||
'email campaign',
|
||||
'email preferences',
|
||||
'end these email updates',
|
||||
'manage your preferences',
|
||||
'hs-sales-engage.com',
|
||||
'long term care',
|
||||
'marketing',
|
||||
'megacast',
|
||||
'modify your preferences',
|
||||
'my subscription',
|
||||
'newsletter',
|
||||
'no longer would like to be contacted',
|
||||
'opt-out',
|
||||
'opt out',
|
||||
'piano',
|
||||
'pmp exam',
|
||||
'prefer not to receive',
|
||||
'prefer not to see',
|
||||
'rather not receive',
|
||||
@@ -45,10 +45,14 @@ BLOCK_KEYWORDS = [
|
||||
'want to receive',
|
||||
'webcast',
|
||||
'webinar',
|
||||
'welder',
|
||||
'white paper',
|
||||
'whitepaper',
|
||||
'wish to be contacted',
|
||||
'wish to receive',
|
||||
'your notifications',
|
||||
'your preferences',
|
||||
'your subscription',
|
||||
]
|
||||
|
||||
BLOCK_DOMAINS = [
|
||||
@@ -62,6 +66,8 @@ BLOCK_DOMAINS = [
|
||||
'denodo.com',
|
||||
'exacttarget.com',
|
||||
'freshsales.io',
|
||||
'hrciconnect.com',
|
||||
'hso.com',
|
||||
'ikigailabs.io',
|
||||
'impetus.com',
|
||||
'informareachmedia.com',
|
||||
@@ -76,6 +82,7 @@ BLOCK_DOMAINS = [
|
||||
'norstar.net',
|
||||
'orjuno.com',
|
||||
'pphosted.com',
|
||||
'qualitynetworks.com',
|
||||
'radware.com',
|
||||
'rsmatco.com',
|
||||
'rubrain.agency',
|
||||
@@ -84,6 +91,7 @@ BLOCK_DOMAINS = [
|
||||
'techmate.com',
|
||||
'thesourcery.com',
|
||||
'trustedmailservers.com',
|
||||
'ubtiinc.com',
|
||||
'zerowait.com',
|
||||
'znsrc.com'
|
||||
]
|
||||
@@ -97,7 +105,7 @@ BLOCK_ANNOYING = [
|
||||
'general election',
|
||||
'giving tuesday',
|
||||
'hardship leave donations needed',
|
||||
'register to vote'
|
||||
'register to vote',
|
||||
'service desk survey request',
|
||||
'special election',
|
||||
'voter registration'
|
||||
@@ -108,7 +116,6 @@ ALLOW = [
|
||||
'github.com',
|
||||
'osu.atlassian.net',
|
||||
'oregonstateuniversity.atlassian.net',
|
||||
'duo.com',
|
||||
'sns.amazonaws.com',
|
||||
'opsgenie.net',
|
||||
'notify@teamdynamixapp.com',
|
||||
@@ -127,7 +134,8 @@ ALLOW = [
|
||||
'heliocampus.atlassian.net',
|
||||
'ctptravelservices.com',
|
||||
'Travel@concursolutions.com',
|
||||
'substack.com',
|
||||
'ghost.io',
|
||||
'orders@catertrax.com',
|
||||
'nytdirect@nytimes.com'
|
||||
]
|
||||
|
||||
@@ -155,7 +163,7 @@ def filter_message(self, message):
|
||||
'changes_osu@heliocampus.com',
|
||||
'no-reply@vmockmail.com',
|
||||
'noreply-beaverhub@oregonstate.edu',
|
||||
'noreply@chatter.salesforce.com'
|
||||
'chatter-beaverhub@oregonstate.edu',
|
||||
]
|
||||
if bool([x for x in automated_sources if(x in normalized_from)]):
|
||||
unactionable = [x.lower() for x in [
|
||||
@@ -184,13 +192,26 @@ def filter_message(self, message):
|
||||
'JV required for',
|
||||
'Changes via Tableau REST API at OSU',
|
||||
'DSDBTEST',
|
||||
'has been assigned to you or your queue',
|
||||
'VMock CSV S3 Upload',
|
||||
'Your Daily Digest for Oregon State University'
|
||||
]]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
return
|
||||
|
||||
# filter servicenow email notifications
|
||||
if 'mysupport-replies@oregonstate.edu' in normalized_from:
|
||||
keep = [
|
||||
'opened on your behalf',
|
||||
'your request REQ'
|
||||
]
|
||||
if is_in_message(keep, message):
|
||||
self._log_result(message, "keeping servicenow message")
|
||||
return
|
||||
else:
|
||||
self._log_result(message, 'moving to servicenow')
|
||||
move_message(message, 'zzz-servicenow')
|
||||
return
|
||||
|
||||
# filter unactionable InCommon SSL cert emails
|
||||
@@ -209,21 +230,31 @@ def filter_message(self, message):
|
||||
return
|
||||
else:
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
return
|
||||
|
||||
# filter Student CRM
|
||||
if 'noreply-beaverhub@oregonstate.edu' in normalized_from:
|
||||
unactionable = [
|
||||
'has been assigned to you or your queue',
|
||||
]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
return
|
||||
|
||||
# filter dependabot
|
||||
if 'dependabot[bot]' in str(message.sender):
|
||||
if ('dependabot[bot]' in str(message.sender)
|
||||
or is_in_message(['Your Dependabot alerts'], message)):
|
||||
self._log_result(message, 'moving to dependabot')
|
||||
move_message(message, '99-dependabot')
|
||||
move_message(message, 'zzz-dependabot')
|
||||
return
|
||||
|
||||
# filter ACTWON
|
||||
if 'actwon_administration@lists.oregonstate.edu' in normalized_from:
|
||||
if not is_in_message(['stacy brock', 'mist', 'integration'], message):
|
||||
self._log_result(message, 'moving to ACTWON')
|
||||
move_message(message, 'lists/ACTWON')
|
||||
return
|
||||
self._log_result(message, 'moving to ACTWON')
|
||||
move_message(message, 'lists/ACTWON')
|
||||
return
|
||||
|
||||
# filter ACUG
|
||||
if 'isacug@oregonstate.edu' in normalized_to:
|
||||
@@ -233,8 +264,8 @@ def filter_message(self, message):
|
||||
|
||||
# filter HelioCampus helpdesk
|
||||
if 'jira@heliocampus.atlassian.net' in normalized_from:
|
||||
self._log_result(message, 'moving to 5-hc-helpdesk')
|
||||
move_message(message, '5-hc-helpdesk')
|
||||
self._log_result(message, 'moving to zzz-hc-helpdesk')
|
||||
move_message(message, 'zzz-hc-helpdesk')
|
||||
return
|
||||
|
||||
# filter alerts-sig
|
||||
@@ -257,24 +288,33 @@ def filter_message(self, message):
|
||||
# filter quarantine spam
|
||||
if 'quarantine@messaging.microsoft.com' in normalized_from:
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
return
|
||||
|
||||
# delete Atlassian spam
|
||||
if ('confluence@osu.atlassian.net' in normalized_from
|
||||
and '[Confluence] Daily Digest' in message.subject):
|
||||
self._log_result(message, 'deleting atlassian spam')
|
||||
message.delete()
|
||||
return
|
||||
# delete atlassian spam
|
||||
if 'confluence@osu.atlassian.net' in normalized_from:
|
||||
unactionable = [
|
||||
'sv1_ds_atlassian',
|
||||
'[confluence] daily digest',
|
||||
]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'deleting atlassian spam')
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete Salesforce spam
|
||||
if ('salesforce.com' in normalized_from
|
||||
and 'sandbox' in message.subject.lower()):
|
||||
self._log_result(message, 'deleting salseforce spam')
|
||||
message.delete()
|
||||
return
|
||||
# delete salesforce spam
|
||||
if 'salesforce.com' in normalized_from:
|
||||
unactionable = [x.lower() for x in [
|
||||
'sandbox',
|
||||
'resetting your Salesforce password',
|
||||
'new Salesforce security token',
|
||||
]]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'deleting salesforce spam')
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete Rave junk
|
||||
# delete rave junk
|
||||
if ('guardian@getrave.com' in normalized_from
|
||||
and 'new guardian chat' in message.subject.lower()):
|
||||
self._log_result(message, 'deleting rave spam')
|
||||
@@ -282,24 +322,28 @@ def filter_message(self, message):
|
||||
return
|
||||
|
||||
# delete conference spam
|
||||
if 'brocks+conf@onid.oregonstate.edu' in normalized_to:
|
||||
if ('brock+conf@onid.oregonstate.edu' in normalized_to
|
||||
or 'brock+conf@oregonstate.edu' in normalized_to):
|
||||
self._log_result(message, 'deleting conference spam')
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete OSU IT Managers list spam
|
||||
if 'osuitmanagers@oregonstate.edu' in normalized_to:
|
||||
self._log_result(message, 'deleting osu it managers spam')
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete other spam
|
||||
if is_in_message(normalized['BLOCK_ANNOYING'], message):
|
||||
if is_in_message(BLOCK_ANNOYING, message):
|
||||
self._log_result(message, 'junking spam containing annoying content')
|
||||
move_message(message, 'Junk Email')
|
||||
return
|
||||
|
||||
# add 'OSU Inform' category to internal messages sent to DLs
|
||||
if ('@oregonstate.edu' in normalized_from
|
||||
and ((not get_header('To', headers)
|
||||
and not get_header('List-Id', headers))
|
||||
or ('inform-c' in ' '.join(normalized_to)))):
|
||||
self._log_result(message, "adding category 'OSU Inform'")
|
||||
add_category(message, 'OSU Inform')
|
||||
# add 'HelioCampus' category to messages from HC
|
||||
if '@heliocampus' in normalized_from:
|
||||
self._log_result(message, "adding category 'HelioCampus'")
|
||||
add_category(message, 'HelioCampus')
|
||||
return
|
||||
|
||||
# keep messages from allowed emails and domains
|
||||
@@ -316,7 +360,7 @@ def filter_message(self, message):
|
||||
return
|
||||
|
||||
# junk messages with blocked keywords
|
||||
if is_in_message(normalized['BLOCK_KEYWORDS'], message):
|
||||
if is_in_message(BLOCK_KEYWORDS, message):
|
||||
self._log_result(message, 'junking spam containing blocked keyword')
|
||||
move_message(message, 'Junk Email')
|
||||
return
|
||||
@@ -343,6 +387,15 @@ def filter_message(self, message):
|
||||
move_message(message, 'Junk Email')
|
||||
return
|
||||
|
||||
# add 'OSU Inform' category to internal messages sent to DLs
|
||||
if ('@oregonstate.edu' in normalized_from
|
||||
and ((not get_header('To', headers)
|
||||
and not get_header('List-Id', headers))
|
||||
or ('inform-c' in ' '.join(normalized_to)))):
|
||||
self._log_result(message, "adding category 'OSU Inform'")
|
||||
add_category(message, 'OSU Inform')
|
||||
return
|
||||
|
||||
# KEEP MESSAGE
|
||||
self._log_result(message, 'keeping message, passed all filter checks')
|
||||
|
||||
@@ -352,13 +405,17 @@ def is_in_message(list_, message):
|
||||
Returns True if any string in the list is found in the message
|
||||
"""
|
||||
is_found = False
|
||||
message_body = message.body.lower()
|
||||
message_subject = message.subject.lower()
|
||||
message_from = message.sender.address.lower()
|
||||
|
||||
for term in list_:
|
||||
if (term in message_subject or term in message_from
|
||||
or term in message_body):
|
||||
# normalize inputs
|
||||
search_terms = [x.lower() for x in list_]
|
||||
message_body = message.body.lower().replace('\ufeff', '')
|
||||
message_subject = message.subject.lower().replace('\ufeff', '')
|
||||
message_from = message.sender.address.lower().replace('\ufeff', '')
|
||||
|
||||
for term in search_terms:
|
||||
if (re.search(term, message_subject)
|
||||
or re.search(term, message_from)
|
||||
or re.search(term, message_body)):
|
||||
is_found = True
|
||||
break
|
||||
return is_found
|
||||
@@ -383,10 +440,6 @@ def init_filters(self):
|
||||
for k,v in self._categories.items():
|
||||
category_cache[k] = v
|
||||
|
||||
# normalize filter keyword lists
|
||||
normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
|
||||
normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING]
|
||||
|
||||
def get_header(header_key, headers):
|
||||
vals = []
|
||||
for header in headers:
|
||||
|
||||
@@ -139,12 +139,17 @@ class O365MailFilter(object):
|
||||
def _log_result(self, message, result):
|
||||
log(f"{self._repr_message(message)} RESULT: {result}")
|
||||
|
||||
def filter(self):
|
||||
def filter(self, is_first_run=False):
|
||||
log(f" {pendulum.now()} Getting mailbox...", logging.DEBUG)
|
||||
mailbox = self._account.mailbox()
|
||||
log(f" {pendulum.now()} Getting folder...", logging.DEBUG)
|
||||
inbox = mailbox.inbox_folder()
|
||||
|
||||
self._load_folders(mailbox)
|
||||
self._load_categories()
|
||||
if is_first_run:
|
||||
log(f" {pendulum.now()} Loading folders and categories...", logging.DEBUG)
|
||||
self._load_folders(mailbox)
|
||||
self._load_categories()
|
||||
log(f" {pendulum.now()} Loading filter rules...", logging.DEBUG)
|
||||
self._load_filters()
|
||||
|
||||
# set limit to max allowed by O365, which is 999 messages
|
||||
@@ -156,9 +161,11 @@ class O365MailFilter(object):
|
||||
'to_recipients', 'from', 'subject', 'body',
|
||||
'internet_message_headers'
|
||||
)
|
||||
log(f" {pendulum.now()} Getting messages from inbox...", logging.DEBUG)
|
||||
messages = inbox.get_messages(query=query, limit=limit, batch=25)
|
||||
|
||||
for message in messages:
|
||||
log(f" {pendulum.now()} {message}", logging.DEBUG)
|
||||
if message.object_id in self._filtered_cache['ids']:
|
||||
# we've already filtered this message, so skip it
|
||||
continue
|
||||
@@ -167,13 +174,16 @@ class O365MailFilter(object):
|
||||
|
||||
def run(self):
|
||||
""" run filter as a loop """
|
||||
is_first_run = True
|
||||
while not self._is_canceled:
|
||||
# clear the filtered message cache if it's older than 4 hours
|
||||
if (self._filtered_cache['last_reset'] is None or
|
||||
self._filtered_cache['last_reset'] < pendulum.now().subtract(hours=4)):
|
||||
self._clear_cache()
|
||||
|
||||
self.filter()
|
||||
log(f"Filtering the sludge @ {pendulum.now()}...", logging.DEBUG)
|
||||
self.filter(is_first_run)
|
||||
is_first_run = False
|
||||
time.sleep(self._config['CHECK_INTERVAL'])
|
||||
|
||||
log('Done.')
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
beautifulsoup4==4.12.2
|
||||
certifi==2023.11.17
|
||||
charset-normalizer==3.3.2
|
||||
idna==3.6
|
||||
O365==2.0.31
|
||||
backoff==2.2.1
|
||||
beautifulsoup4==4.13.1
|
||||
certifi==2025.1.31
|
||||
charset-normalizer==3.4.1
|
||||
idna==3.10
|
||||
O365==2.0.38
|
||||
oauthlib==3.2.2
|
||||
pendulum==2.1.2
|
||||
python-dateutil==2.8.2
|
||||
pytz-deprecation-shim==0.1.0.post0
|
||||
pytzdata==2020.1
|
||||
requests==2.31.0
|
||||
requests-oauthlib==1.3.1
|
||||
six==1.16.0
|
||||
soupsieve==2.5
|
||||
pendulum==3.0.0
|
||||
python-dateutil==2.9.0.post0
|
||||
requests==2.32.3
|
||||
requests-oauthlib==2.0.0
|
||||
six==1.17.0
|
||||
soupsieve==2.6
|
||||
stringcase==1.2.0
|
||||
tzdata==2023.3
|
||||
tzlocal==4.3.1
|
||||
urllib3==2.1.0
|
||||
time-machine==2.16.0
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2025.1
|
||||
tzlocal==5.2
|
||||
urllib3==2.3.0
|
||||
|
||||
Reference in New Issue
Block a user