Compare commits

..

10 Commits

Author SHA1 Message Date
Stacy Brock
629942d447 Update package requirements 2025-02-03 11:24:26 -08:00
Stacy Brock
ce3ec9b425 Update package requirements 2024-12-17 06:07:34 -08:00
Stacy Brock
58978a718f Update filters 2024-11-20 14:21:50 -08:00
Stacy Brock
163d97120e Update package requirements 2024-08-30 09:16:27 -07:00
Stacy Brock
5dbb0cb8af Normalize all inputs to is_in_message() function 2024-07-29 09:19:57 -07:00
Stacy Brock
28134aa957 Update filters 2024-07-19 12:50:32 -07:00
Stacy Brock
fa2cb03b4d Update package requirements 2024-07-11 08:45:49 -07:00
Stacy Brock
5b9a5fcf9d Update filters 2024-07-10 09:27:09 -07:00
Stacy Brock
839c3ff389 Update filters 2024-06-26 11:13:08 -07:00
Stacy Brock
defa1de136 Speed up processing by loading folders and categories on 1st run
This commit will also
- Update package requirements
- Update filters
2024-02-15 15:15:00 -08:00
3 changed files with 143 additions and 79 deletions

View File

@@ -1,34 +1,34 @@
import re
BLOCK_EMAIL = [
'chirhart@amazon.com',
'ron.krogel@citrix.com',
'lisa@duo.com',
'replieswelcome@duo.com',
'webinars@duo.com',
'support-noreply@status.duosecurity.com',
'alerts@alerts.mail.hpe.com',
'viva-noreply@microsoft.com',
'jramiro@pagerduty.com',
'info@snowflake.com',
'noreply-marketplace@zoom.us',
'equal.opportunity@oregonstate.edu',
'diversity@oregonstate.edu',
'evals@oregonstate.edu',
'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com',
'noreply@msexperience.microsoft.com',
]
BLOCK_KEYWORDS = [
'advertising services',
'closeml.com',
'advertising service',
'email campaign',
'email preferences',
'end these email updates',
'manage your preferences',
'hs-sales-engage.com',
'long term care',
'marketing',
'megacast',
'modify your preferences',
'my subscription',
'newsletter',
'no longer would like to be contacted',
'opt-out',
'opt out',
'piano',
'pmp exam',
'prefer not to receive',
'prefer not to see',
'rather not receive',
@@ -45,10 +45,14 @@ BLOCK_KEYWORDS = [
'want to receive',
'webcast',
'webinar',
'welder',
'white paper',
'whitepaper',
'wish to be contacted',
'wish to receive',
'your notifications',
'your preferences',
'your subscription',
]
BLOCK_DOMAINS = [
@@ -62,6 +66,8 @@ BLOCK_DOMAINS = [
'denodo.com',
'exacttarget.com',
'freshsales.io',
'hrciconnect.com',
'hso.com',
'ikigailabs.io',
'impetus.com',
'informareachmedia.com',
@@ -76,6 +82,7 @@ BLOCK_DOMAINS = [
'norstar.net',
'orjuno.com',
'pphosted.com',
'qualitynetworks.com',
'radware.com',
'rsmatco.com',
'rubrain.agency',
@@ -84,6 +91,7 @@ BLOCK_DOMAINS = [
'techmate.com',
'thesourcery.com',
'trustedmailservers.com',
'ubtiinc.com',
'zerowait.com',
'znsrc.com'
]
@@ -97,7 +105,7 @@ BLOCK_ANNOYING = [
'general election',
'giving tuesday',
'hardship leave donations needed',
'register to vote'
'register to vote',
'service desk survey request',
'special election',
'voter registration'
@@ -108,7 +116,6 @@ ALLOW = [
'github.com',
'osu.atlassian.net',
'oregonstateuniversity.atlassian.net',
'duo.com',
'sns.amazonaws.com',
'opsgenie.net',
'notify@teamdynamixapp.com',
@@ -127,7 +134,8 @@ ALLOW = [
'heliocampus.atlassian.net',
'ctptravelservices.com',
'Travel@concursolutions.com',
'substack.com',
'ghost.io',
'orders@catertrax.com',
'nytdirect@nytimes.com'
]
@@ -155,7 +163,7 @@ def filter_message(self, message):
'changes_osu@heliocampus.com',
'no-reply@vmockmail.com',
'noreply-beaverhub@oregonstate.edu',
'noreply@chatter.salesforce.com'
'chatter-beaverhub@oregonstate.edu',
]
if bool([x for x in automated_sources if(x in normalized_from)]):
unactionable = [x.lower() for x in [
@@ -184,13 +192,26 @@ def filter_message(self, message):
'JV required for',
'Changes via Tableau REST API at OSU',
'DSDBTEST',
'has been assigned to you or your queue',
'VMock CSV S3 Upload',
'Your Daily Digest for Oregon State University'
]]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
move_message(message, 'zzz-unactionable')
return
# filter servicenow email notifications
if 'mysupport-replies@oregonstate.edu' in normalized_from:
keep = [
'opened on your behalf',
'your request REQ'
]
if is_in_message(keep, message):
self._log_result(message, "keeping servicenow message")
return
else:
self._log_result(message, 'moving to servicenow')
move_message(message, 'zzz-servicenow')
return
# filter unactionable InCommon SSL cert emails
@@ -209,21 +230,31 @@ def filter_message(self, message):
return
else:
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
move_message(message, 'zzz-unactionable')
return
# filter Student CRM
if 'noreply-beaverhub@oregonstate.edu' in normalized_from:
unactionable = [
'has been assigned to you or your queue',
]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'zzz-unactionable')
return
# filter dependabot
if 'dependabot[bot]' in str(message.sender):
if ('dependabot[bot]' in str(message.sender)
or is_in_message(['Your Dependabot alerts'], message)):
self._log_result(message, 'moving to dependabot')
move_message(message, '99-dependabot')
move_message(message, 'zzz-dependabot')
return
# filter ACTWON
if 'actwon_administration@lists.oregonstate.edu' in normalized_from:
if not is_in_message(['stacy brock', 'mist', 'integration'], message):
self._log_result(message, 'moving to ACTWON')
move_message(message, 'lists/ACTWON')
return
self._log_result(message, 'moving to ACTWON')
move_message(message, 'lists/ACTWON')
return
# filter ACUG
if 'isacug@oregonstate.edu' in normalized_to:
@@ -233,8 +264,8 @@ def filter_message(self, message):
# filter HelioCampus helpdesk
if 'jira@heliocampus.atlassian.net' in normalized_from:
self._log_result(message, 'moving to 5-hc-helpdesk')
move_message(message, '5-hc-helpdesk')
self._log_result(message, 'moving to zzz-hc-helpdesk')
move_message(message, 'zzz-hc-helpdesk')
return
# filter alerts-sig
@@ -257,24 +288,33 @@ def filter_message(self, message):
# filter quarantine spam
if 'quarantine@messaging.microsoft.com' in normalized_from:
self._log_result(message, 'moving to unactionable')
move_message(message, 'unactionable')
move_message(message, 'zzz-unactionable')
return
# delete Atlassian spam
if ('confluence@osu.atlassian.net' in normalized_from
and '[Confluence] Daily Digest' in message.subject):
self._log_result(message, 'deleting atlassian spam')
message.delete()
return
# delete atlassian spam
if 'confluence@osu.atlassian.net' in normalized_from:
unactionable = [
'sv1_ds_atlassian',
'[confluence] daily digest',
]
if is_in_message(unactionable, message):
self._log_result(message, 'deleting atlassian spam')
message.delete()
return
# delete Salesforce spam
if ('salesforce.com' in normalized_from
and 'sandbox' in message.subject.lower()):
self._log_result(message, 'deleting salseforce spam')
message.delete()
return
# delete salesforce spam
if 'salesforce.com' in normalized_from:
unactionable = [x.lower() for x in [
'sandbox',
'resetting your Salesforce password',
'new Salesforce security token',
]]
if is_in_message(unactionable, message):
self._log_result(message, 'deleting salesforce spam')
message.delete()
return
# delete Rave junk
# delete rave junk
if ('guardian@getrave.com' in normalized_from
and 'new guardian chat' in message.subject.lower()):
self._log_result(message, 'deleting rave spam')
@@ -282,24 +322,28 @@ def filter_message(self, message):
return
# delete conference spam
if 'brocks+conf@onid.oregonstate.edu' in normalized_to:
if ('brock+conf@onid.oregonstate.edu' in normalized_to
or 'brock+conf@oregonstate.edu' in normalized_to):
self._log_result(message, 'deleting conference spam')
message.delete()
return
# delete OSU IT Managers list spam
if 'osuitmanagers@oregonstate.edu' in normalized_to:
self._log_result(message, 'deleting osu it managers spam')
message.delete()
return
# delete other spam
if is_in_message(normalized['BLOCK_ANNOYING'], message):
if is_in_message(BLOCK_ANNOYING, message):
self._log_result(message, 'junking spam containing annoying content')
move_message(message, 'Junk Email')
return
# add 'OSU Inform' category to internal messages sent to DLs
if ('@oregonstate.edu' in normalized_from
and ((not get_header('To', headers)
and not get_header('List-Id', headers))
or ('inform-c' in ' '.join(normalized_to)))):
self._log_result(message, "adding category 'OSU Inform'")
add_category(message, 'OSU Inform')
# add 'HelioCampus' category to messages from HC
if '@heliocampus' in normalized_from:
self._log_result(message, "adding category 'HelioCampus'")
add_category(message, 'HelioCampus')
return
# keep messages from allowed emails and domains
@@ -316,7 +360,7 @@ def filter_message(self, message):
return
# junk messages with blocked keywords
if is_in_message(normalized['BLOCK_KEYWORDS'], message):
if is_in_message(BLOCK_KEYWORDS, message):
self._log_result(message, 'junking spam containing blocked keyword')
move_message(message, 'Junk Email')
return
@@ -343,6 +387,15 @@ def filter_message(self, message):
move_message(message, 'Junk Email')
return
# add 'OSU Inform' category to internal messages sent to DLs
if ('@oregonstate.edu' in normalized_from
and ((not get_header('To', headers)
and not get_header('List-Id', headers))
or ('inform-c' in ' '.join(normalized_to)))):
self._log_result(message, "adding category 'OSU Inform'")
add_category(message, 'OSU Inform')
return
# KEEP MESSAGE
self._log_result(message, 'keeping message, passed all filter checks')
@@ -352,13 +405,17 @@ def is_in_message(list_, message):
Returns True if any string in the list is found in the message
"""
is_found = False
message_body = message.body.lower()
message_subject = message.subject.lower()
message_from = message.sender.address.lower()
for term in list_:
if (term in message_subject or term in message_from
or term in message_body):
# normalize inputs
search_terms = [x.lower() for x in list_]
message_body = message.body.lower().replace('\ufeff', '')
message_subject = message.subject.lower().replace('\ufeff', '')
message_from = message.sender.address.lower().replace('\ufeff', '')
for term in search_terms:
if (re.search(term, message_subject)
or re.search(term, message_from)
or re.search(term, message_body)):
is_found = True
break
return is_found
@@ -383,10 +440,6 @@ def init_filters(self):
for k,v in self._categories.items():
category_cache[k] = v
# normalize filter keyword lists
normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
normalized['BLOCK_ANNOYING'] = [x.lower() for x in BLOCK_ANNOYING]
def get_header(header_key, headers):
vals = []
for header in headers:

View File

@@ -139,12 +139,17 @@ class O365MailFilter(object):
def _log_result(self, message, result):
log(f"{self._repr_message(message)} RESULT: {result}")
def filter(self):
def filter(self, is_first_run=False):
log(f" {pendulum.now()} Getting mailbox...", logging.DEBUG)
mailbox = self._account.mailbox()
log(f" {pendulum.now()} Getting folder...", logging.DEBUG)
inbox = mailbox.inbox_folder()
self._load_folders(mailbox)
self._load_categories()
if is_first_run:
log(f" {pendulum.now()} Loading folders and categories...", logging.DEBUG)
self._load_folders(mailbox)
self._load_categories()
log(f" {pendulum.now()} Loading filter rules...", logging.DEBUG)
self._load_filters()
# set limit to max allowed by O365, which is 999 messages
@@ -156,9 +161,11 @@ class O365MailFilter(object):
'to_recipients', 'from', 'subject', 'body',
'internet_message_headers'
)
log(f" {pendulum.now()} Getting messages from inbox...", logging.DEBUG)
messages = inbox.get_messages(query=query, limit=limit, batch=25)
for message in messages:
log(f" {pendulum.now()} {message}", logging.DEBUG)
if message.object_id in self._filtered_cache['ids']:
# we've already filtered this message, so skip it
continue
@@ -167,13 +174,16 @@ class O365MailFilter(object):
def run(self):
""" run filter as a loop """
is_first_run = True
while not self._is_canceled:
# clear the filtered message cache if it's older than 4 hours
if (self._filtered_cache['last_reset'] is None or
self._filtered_cache['last_reset'] < pendulum.now().subtract(hours=4)):
self._clear_cache()
self.filter()
log(f"Filtering the sludge @ {pendulum.now()}...", logging.DEBUG)
self.filter(is_first_run)
is_first_run = False
time.sleep(self._config['CHECK_INTERVAL'])
log('Done.')

View File

@@ -1,18 +1,19 @@
beautifulsoup4==4.12.2
certifi==2023.11.17
charset-normalizer==3.3.2
idna==3.6
O365==2.0.31
backoff==2.2.1
beautifulsoup4==4.13.1
certifi==2025.1.31
charset-normalizer==3.4.1
idna==3.10
O365==2.0.38
oauthlib==3.2.2
pendulum==2.1.2
python-dateutil==2.8.2
pytz-deprecation-shim==0.1.0.post0
pytzdata==2020.1
requests==2.31.0
requests-oauthlib==1.3.1
six==1.16.0
soupsieve==2.5
pendulum==3.0.0
python-dateutil==2.9.0.post0
requests==2.32.3
requests-oauthlib==2.0.0
six==1.17.0
soupsieve==2.6
stringcase==1.2.0
tzdata==2023.3
tzlocal==4.3.1
urllib3==2.1.0
time-machine==2.16.0
typing_extensions==4.12.2
tzdata==2025.1
tzlocal==5.2
urllib3==2.3.0