Speed up processing by loading folders and categories on 1st run

This commit will also
- Update package requirements
- Update filters
This commit is contained in:
Stacy Brock
2024-02-15 15:15:00 -08:00
parent 6e942d329f
commit defa1de136
3 changed files with 37 additions and 21 deletions

View File

@@ -62,6 +62,7 @@ BLOCK_DOMAINS = [
'denodo.com',
'exacttarget.com',
'freshsales.io',
'hrciconnect.com',
'ikigailabs.io',
'impetus.com',
'informareachmedia.com',
@@ -127,7 +128,7 @@ ALLOW = [
'heliocampus.atlassian.net',
'ctptravelservices.com',
'Travel@concursolutions.com',
'substack.com',
'ghost.io',
'nytdirect@nytimes.com'
]
@@ -293,13 +294,10 @@ def filter_message(self, message):
move_message(message, 'Junk Email')
return
# add 'OSU Inform' category to internal messages sent to DLs
if ('@oregonstate.edu' in normalized_from
and ((not get_header('To', headers)
and not get_header('List-Id', headers))
or ('inform-c' in ' '.join(normalized_to)))):
self._log_result(message, "adding category 'OSU Inform'")
add_category(message, 'OSU Inform')
# add 'HelioCampus' category to messages from HC
if '@heliocampus' in normalized_from:
self._log_result(message, "adding category 'HelioCampus'")
add_category(message, 'HelioCampus')
return
# keep messages from allowed emails and domains
@@ -343,6 +341,15 @@ def filter_message(self, message):
move_message(message, 'Junk Email')
return
# add 'OSU Inform' category to internal messages sent to DLs
if ('@oregonstate.edu' in normalized_from
and ((not get_header('To', headers)
and not get_header('List-Id', headers))
or ('inform-c' in ' '.join(normalized_to)))):
self._log_result(message, "adding category 'OSU Inform'")
add_category(message, 'OSU Inform')
return
# KEEP MESSAGE
self._log_result(message, 'keeping message, passed all filter checks')

View File

@@ -139,12 +139,17 @@ class O365MailFilter(object):
def _log_result(self, message, result):
log(f"{self._repr_message(message)} RESULT: {result}")
def filter(self):
def filter(self, is_first_run=False):
log(f" {pendulum.now()} Getting mailbox...", logging.DEBUG)
mailbox = self._account.mailbox()
log(f" {pendulum.now()} Getting folder...", logging.DEBUG)
inbox = mailbox.inbox_folder()
self._load_folders(mailbox)
self._load_categories()
if is_first_run:
log(f" {pendulum.now()} Loading folders and categories...", logging.DEBUG)
self._load_folders(mailbox)
self._load_categories()
log(f" {pendulum.now()} Loading filter rules...", logging.DEBUG)
self._load_filters()
# set limit to max allowed by O365, which is 999 messages
@@ -156,9 +161,11 @@ class O365MailFilter(object):
'to_recipients', 'from', 'subject', 'body',
'internet_message_headers'
)
log(f" {pendulum.now()} Getting messages from inbox...", logging.DEBUG)
messages = inbox.get_messages(query=query, limit=limit, batch=25)
for message in messages:
log(f" {pendulum.now()} {message}", logging.DEBUG)
if message.object_id in self._filtered_cache['ids']:
# we've already filtered this message, so skip it
continue
@@ -167,13 +174,16 @@ class O365MailFilter(object):
def run(self):
""" run filter as a loop """
is_first_run = True
while not self._is_canceled:
# clear the filtered message cache if it's older than 4 hours
if (self._filtered_cache['last_reset'] is None or
self._filtered_cache['last_reset'] < pendulum.now().subtract(hours=4)):
self._clear_cache()
self.filter()
log(f"Filtering the sludge @ {pendulum.now()}...", logging.DEBUG)
self.filter(is_first_run)
is_first_run = False
time.sleep(self._config['CHECK_INTERVAL'])
log('Done.')

View File

@@ -1,18 +1,17 @@
beautifulsoup4==4.12.2
certifi==2023.11.17
beautifulsoup4==4.12.3
certifi==2024.2.2
charset-normalizer==3.3.2
idna==3.6
O365==2.0.31
O365==2.0.33
oauthlib==3.2.2
pendulum==2.1.2
pendulum==3.0.0
python-dateutil==2.8.2
pytz-deprecation-shim==0.1.0.post0
pytzdata==2020.1
requests==2.31.0
requests-oauthlib==1.3.1
six==1.16.0
soupsieve==2.5
stringcase==1.2.0
tzdata==2023.3
tzlocal==4.3.1
urllib3==2.1.0
time-machine==2.13.0
tzdata==2024.1
tzlocal==5.2
urllib3==2.2.0