Compare commits

...

2 Commits

Author SHA1 Message Date
Jacob Mastel
d8417b4449 Switched to black formatting 2025-09-17 09:44:26 -07:00
Jacob Mastel
6cf6ecc78e Added black formatting 2025-09-17 09:39:57 -07:00
2 changed files with 344 additions and 325 deletions

View File

@@ -1,148 +1,149 @@
import re
BLOCK_EMAIL = [
'support-noreply@status.duosecurity.com',
'alerts@alerts.mail.hpe.com',
'viva-noreply@microsoft.com',
'info@snowflake.com',
'noreply-marketplace@zoom.us',
'equal.opportunity@oregonstate.edu',
'diversity@oregonstate.edu',
'evals@oregonstate.edu',
'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com',
'noreply@msexperience.microsoft.com',
"support-noreply@status.duosecurity.com",
"alerts@alerts.mail.hpe.com",
"viva-noreply@microsoft.com",
"info@snowflake.com",
"noreply-marketplace@zoom.us",
"equal.opportunity@oregonstate.edu",
"diversity@oregonstate.edu",
"evals@oregonstate.edu",
"sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com",
"noreply@msexperience.microsoft.com",
]
BLOCK_KEYWORDS = [
'advertising service',
'email campaign',
'email preferences',
'end these email updates',
'hs-sales-engage.com',
'long term care',
'marketing',
'megacast',
'my subscription',
'newsletter',
'no longer would like to be contacted',
'opt-out',
'opt out',
'piano',
'pmp exam',
'prefer not to receive',
'prefer not to see',
'rather not receive',
'remove from list',
'remove from this list',
'Samsung SDS America',
'sidekickopen',
'subscription preferences',
'survey',
'take-me-off',
'this advertisement',
'unsub_center',
'unsubscribe',
'want to receive',
'webcast',
'webinar',
'welder',
'white paper',
'whitepaper',
'wish to be contacted',
'wish to receive',
'your notifications',
'your preferences',
'your subscription',
"advertising service",
"email campaign",
"email preferences",
"end these email updates",
"hs-sales-engage.com",
"long term care",
"marketing",
"megacast",
"my subscription",
"newsletter",
"no longer would like to be contacted",
"opt-out",
"opt out",
"piano",
"pmp exam",
"prefer not to receive",
"prefer not to see",
"rather not receive",
"remove from list",
"remove from this list",
"Samsung SDS America",
"sidekickopen",
"subscription preferences",
"survey",
"take-me-off",
"this advertisement",
"unsub_center",
"unsubscribe",
"want to receive",
"webcast",
"webinar",
"welder",
"white paper",
"whitepaper",
"wish to be contacted",
"wish to receive",
"your notifications",
"your preferences",
"your subscription",
]
BLOCK_DOMAINS = [
'aafintl.com',
'atscale.com',
'astutechsolutions.com',
'bytespeed.com',
'checkpoint.com',
'cmadvantage',
'customeriomail.com',
'denodo.com',
'exacttarget.com',
'freshsales.io',
'hrciconnect.com',
'hso.com',
'ikigailabs.io',
'impetus.com',
'informareachmedia.com',
'javentechnologies.com',
'kuusakoski.com',
'mailgun.net',
'malwarebytes.com',
'matrixservice.com',
'mimecast.com',
'mktomail.com',
'msgfocus.com',
'norstar.net',
'orjuno.com',
'pphosted.com',
'qualitynetworks.com',
'radware.com',
'rsmatco.com',
'rubrain.agency',
'sendgrid.net',
'sparkpostmail.com',
'techmate.com',
'thesourcery.com',
'trustedmailservers.com',
'ubtiinc.com',
'zerowait.com',
'znsrc.com'
"aafintl.com",
"atscale.com",
"astutechsolutions.com",
"bytespeed.com",
"checkpoint.com",
"cmadvantage",
"customeriomail.com",
"denodo.com",
"exacttarget.com",
"freshsales.io",
"hrciconnect.com",
"hso.com",
"ikigailabs.io",
"impetus.com",
"informareachmedia.com",
"javentechnologies.com",
"kuusakoski.com",
"mailgun.net",
"malwarebytes.com",
"matrixservice.com",
"mimecast.com",
"mktomail.com",
"msgfocus.com",
"norstar.net",
"orjuno.com",
"pphosted.com",
"qualitynetworks.com",
"radware.com",
"rsmatco.com",
"rubrain.agency",
"sendgrid.net",
"sparkpostmail.com",
"techmate.com",
"thesourcery.com",
"trustedmailservers.com",
"ubtiinc.com",
"zerowait.com",
"znsrc.com",
]
BLOCK_ANNOYING = [
'charitable fund drive',
'election reminder',
'faculty senate agenda',
'food drive',
'food share',
'general election',
'giving tuesday',
'hardship leave donations needed',
'register to vote',
'service desk survey request',
'special election',
'voter registration'
"charitable fund drive",
"election reminder",
"faculty senate agenda",
"food drive",
"food share",
"general election",
"giving tuesday",
"hardship leave donations needed",
"register to vote",
"service desk survey request",
"special election",
"voter registration",
]
ALLOW = [
'oregonstate.edu',
'github.com',
'osu.atlassian.net',
'oregonstateuniversity.atlassian.net',
'sns.amazonaws.com',
'opsgenie.net',
'notify@teamdynamixapp.com',
'newsbites@email.sans.org',
'noreply@box.com',
'noreply@email.teams.microsoft.com',
'no-reply@sharepointonline.com',
'govdelivery.com',
'linkoregon.org',
'pdxwit.org',
'busyconf.com',
'support@githubsupport.com',
'microsoft.com',
'docusign.net',
'ideal-logic.com',
'heliocampus.atlassian.net',
'ctptravelservices.com',
'Travel@concursolutions.com',
'ghost.io',
'orders@catertrax.com',
'nytdirect@nytimes.com'
"oregonstate.edu",
"github.com",
"osu.atlassian.net",
"oregonstateuniversity.atlassian.net",
"sns.amazonaws.com",
"opsgenie.net",
"notify@teamdynamixapp.com",
"newsbites@email.sans.org",
"noreply@box.com",
"noreply@email.teams.microsoft.com",
"no-reply@sharepointonline.com",
"govdelivery.com",
"linkoregon.org",
"pdxwit.org",
"busyconf.com",
"support@githubsupport.com",
"microsoft.com",
"docusign.net",
"ideal-logic.com",
"heliocampus.atlassian.net",
"ctptravelservices.com",
"Travel@concursolutions.com",
"ghost.io",
"orders@catertrax.com",
"nytdirect@nytimes.com",
]
normalized = {}
folder_cache = {}
category_cache = {}
def filter_message(self, message):
# normalize message attributes
normalized_to = [x.address.lower() for x in message.to]
@@ -152,217 +153,223 @@ def filter_message(self, message):
headers = []
for header in message.message_headers:
h = {}
h[header['name']] = header['value'].lower()
h[header["name"]] = header["value"].lower()
headers.append(h)
# filter unactionable IAR emails
automated_sources = [
'coresys@lists.oregonstate.edu',
'iar.ref@oregonstate.edu',
'iar.systems-team@oregonstate.edu',
'changes_osu@heliocampus.com',
'no-reply@vmockmail.com',
'noreply-beaverhub@oregonstate.edu',
'chatter-beaverhub@oregonstate.edu',
"coresys@lists.oregonstate.edu",
"iar.ref@oregonstate.edu",
"iar.systems-team@oregonstate.edu",
"changes_osu@heliocampus.com",
"no-reply@vmockmail.com",
"noreply-beaverhub@oregonstate.edu",
"chatter-beaverhub@oregonstate.edu",
]
if bool([x for x in automated_sources if(x in normalized_from)]):
unactionable = [x.lower() for x in [
'DWPRODRAW Verification',
'Job Monitor',
' - OK',
'ODProd Row Count Comparison',
'Oracle ODprod Sessions Older Than Today',
'DSDB Server Audit',
'UserBase.Users users deactivated due to ORG changes and termination',
'SSRS Datasets Needing Caching',
'Audit Update',
'ScholarUniverse to GRRS load',
'OACIS Pending Actions Notice',
'You Have OSUF Disapproved Reimbursements',
'Your GRRS to Banner scholarship load report',
'You Have OSUF Reimbursements to Review',
'You Have Scholarship Payment Plans',
'You Have Direct Payment Requests to Review',
'You Have Disapproved Scholarship Payment Plans',
'OSUF Reimbursements Needing Your Prompt Attention',
'Redistribution Verification Error',
'Direct Payment Request',
'Status Change in your Detail Code Request',
'loaded into the Index Reimbursement System',
'JV required for',
'Changes via Tableau REST API at OSU',
'DSDBTEST',
'VMock CSV S3 Upload',
'Your Daily Digest for Oregon State University'
]]
if bool([x for x in automated_sources if (x in normalized_from)]):
unactionable = [
x.lower()
for x in [
"DWPRODRAW Verification",
"Job Monitor",
" - OK",
"ODProd Row Count Comparison",
"Oracle ODprod Sessions Older Than Today",
"DSDB Server Audit",
"UserBase.Users users deactivated due to ORG changes and termination",
"SSRS Datasets Needing Caching",
"Audit Update",
"ScholarUniverse to GRRS load",
"OACIS Pending Actions Notice",
"You Have OSUF Disapproved Reimbursements",
"Your GRRS to Banner scholarship load report",
"You Have OSUF Reimbursements to Review",
"You Have Scholarship Payment Plans",
"You Have Direct Payment Requests to Review",
"You Have Disapproved Scholarship Payment Plans",
"OSUF Reimbursements Needing Your Prompt Attention",
"Redistribution Verification Error",
"Direct Payment Request",
"Status Change in your Detail Code Request",
"loaded into the Index Reimbursement System",
"JV required for",
"Changes via Tableau REST API at OSU",
"DSDBTEST",
"VMock CSV S3 Upload",
"Your Daily Digest for Oregon State University",
]
]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'zzz-unactionable')
self._log_result(message, "moving to unactionable")
move_message(message, "zzz-unactionable")
return
# filter servicenow email notifications
if 'mysupport-replies@oregonstate.edu' in normalized_from:
keep = [
'opened on your behalf',
'your request REQ'
]
if "mysupport-replies@oregonstate.edu" in normalized_from:
keep = ["opened on your behalf", "your request REQ"]
if is_in_message(keep, message):
self._log_result(message, "keeping servicenow message")
return
else:
self._log_result(message, 'moving to servicenow')
move_message(message, 'zzz-servicenow')
self._log_result(message, "moving to servicenow")
move_message(message, "zzz-servicenow")
return
# filter unactionable InCommon SSL cert emails
if 'support@cert-manager.com' in normalized_from:
actionable = [
'iar',
'sig',
'analytics',
'cwp-access',
'dsdb',
'tableau'
]
if "support@cert-manager.com" in normalized_from:
actionable = ["iar", "sig", "analytics", "cwp-access", "dsdb", "tableau"]
if is_in_message(actionable, message):
self._log_result(message,
f"keeping message for actionable SSL notification")
self._log_result(
message, f"keeping message for actionable SSL notification"
)
return
else:
self._log_result(message, 'moving to unactionable')
move_message(message, 'zzz-unactionable')
self._log_result(message, "moving to unactionable")
move_message(message, "zzz-unactionable")
return
# filter Student CRM
if 'noreply-beaverhub@oregonstate.edu' in normalized_from:
if "noreply-beaverhub@oregonstate.edu" in normalized_from:
unactionable = [
'has been assigned to you or your queue',
"has been assigned to you or your queue",
]
if is_in_message(unactionable, message):
self._log_result(message, 'moving to unactionable')
move_message(message, 'zzz-unactionable')
self._log_result(message, "moving to unactionable")
move_message(message, "zzz-unactionable")
return
# filter dependabot
if ('dependabot[bot]' in str(message.sender)
or is_in_message(['Your Dependabot alerts'], message)):
self._log_result(message, 'moving to dependabot')
move_message(message, 'zzz-dependabot')
if "dependabot[bot]" in str(message.sender) or is_in_message(
["Your Dependabot alerts"], message
):
self._log_result(message, "moving to dependabot")
move_message(message, "zzz-dependabot")
return
# filter ACTWON
if 'actwon_administration@lists.oregonstate.edu' in normalized_from:
self._log_result(message, 'moving to ACTWON')
move_message(message, 'lists/ACTWON')
if "actwon_administration@lists.oregonstate.edu" in normalized_from:
self._log_result(message, "moving to ACTWON")
move_message(message, "lists/ACTWON")
return
# filter ACUG
if 'isacug@oregonstate.edu' in normalized_to:
self._log_result(message, 'moving to ACUG')
move_message(message, 'lists/ACUG')
if "isacug@oregonstate.edu" in normalized_to:
self._log_result(message, "moving to ACUG")
move_message(message, "lists/ACUG")
return
# filter HelioCampus helpdesk
if 'jira@heliocampus.atlassian.net' in normalized_from:
self._log_result(message, 'moving to zzz-hc-helpdesk')
move_message(message, 'zzz-hc-helpdesk')
if "jira@heliocampus.atlassian.net" in normalized_from:
self._log_result(message, "moving to zzz-hc-helpdesk")
move_message(message, "zzz-hc-helpdesk")
return
# filter alerts-sig
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
'mcc-b12-stor1@oregonstate.edu',
'ousclus@oregonstate.edu',
'isilon@storage.sig.oregonstate.edu',
'me4012@sig.oregonstate.edu']
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
self._log_result(message, 'moving to alerts-sig')
move_message(message, 'lists/alerts-sig')
if (
normalized_from
in [
"mcc-b11-stor1@oregonstate.edu",
"mcc-b12-stor1@oregonstate.edu",
"ousclus@oregonstate.edu",
"isilon@storage.sig.oregonstate.edu",
"me4012@sig.oregonstate.edu",
]
or "alarm.DatastoreDiskUsageAlarm" in message.subject
):
self._log_result(message, "moving to alerts-sig")
move_message(message, "lists/alerts-sig")
return
# filter backup-nightly
if 'backup-nightly@lists.oregonstate.edu' in message.to:
self._log_result(message, 'moving to backup-nightly')
move_message(message, 'lists/backup-nightly')
if "backup-nightly@lists.oregonstate.edu" in message.to:
self._log_result(message, "moving to backup-nightly")
move_message(message, "lists/backup-nightly")
return
# filter quarantine spam
if 'quarantine@messaging.microsoft.com' in normalized_from:
self._log_result(message, 'moving to unactionable')
move_message(message, 'zzz-unactionable')
if "quarantine@messaging.microsoft.com" in normalized_from:
self._log_result(message, "moving to unactionable")
move_message(message, "zzz-unactionable")
return
# delete atlassian spam
if 'confluence@osu.atlassian.net' in normalized_from:
if "confluence@osu.atlassian.net" in normalized_from:
unactionable = [
'sv1_ds_atlassian',
'[confluence] daily digest',
"sv1_ds_atlassian",
"[confluence] daily digest",
]
if is_in_message(unactionable, message):
self._log_result(message, 'deleting atlassian spam')
self._log_result(message, "deleting atlassian spam")
message.delete()
return
# delete salesforce spam
if 'salesforce.com' in normalized_from:
unactionable = [x.lower() for x in [
'sandbox',
'resetting your Salesforce password',
'new Salesforce security token',
]]
if "salesforce.com" in normalized_from:
unactionable = [
x.lower()
for x in [
"sandbox",
"resetting your Salesforce password",
"new Salesforce security token",
]
]
if is_in_message(unactionable, message):
self._log_result(message, 'deleting salesforce spam')
self._log_result(message, "deleting salesforce spam")
message.delete()
return
# delete rave junk
if ('guardian@getrave.com' in normalized_from
and 'new guardian chat' in message.subject.lower()):
self._log_result(message, 'deleting rave spam')
if (
"guardian@getrave.com" in normalized_from
and "new guardian chat" in message.subject.lower()
):
self._log_result(message, "deleting rave spam")
message.delete()
return
# delete conference spam
if ('brock+conf@onid.oregonstate.edu' in normalized_to
or 'brock+conf@oregonstate.edu' in normalized_to):
self._log_result(message, 'deleting conference spam')
if (
"brock+conf@onid.oregonstate.edu" in normalized_to
or "brock+conf@oregonstate.edu" in normalized_to
):
self._log_result(message, "deleting conference spam")
message.delete()
return
# delete OSU IT Managers list spam
if 'osuitmanagers@oregonstate.edu' in normalized_to:
self._log_result(message, 'deleting osu it managers spam')
if "osuitmanagers@oregonstate.edu" in normalized_to:
self._log_result(message, "deleting osu it managers spam")
message.delete()
return
# delete other spam
if is_in_message(BLOCK_ANNOYING, message):
self._log_result(message, 'junking spam containing annoying content')
move_message(message, 'Junk Email')
self._log_result(message, "junking spam containing annoying content")
move_message(message, "Junk Email")
return
# add 'HelioCampus' category to messages from HC
if '@heliocampus' in normalized_from:
if "@heliocampus" in normalized_from:
self._log_result(message, "adding category 'HelioCampus'")
add_category(message, 'HelioCampus')
add_category(message, "HelioCampus")
return
# keep messages from allowed emails and domains
for good in ALLOW:
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
self._log_result(message,
f"keeping message from allowed sender {good}")
self._log_result(message, f"keeping message from allowed sender {good}")
return
# junk messages from blocked senders
if normalized_from in BLOCK_EMAIL:
self._log_result(message, 'junking spam from blocked sender')
move_message(message, 'Junk Email')
self._log_result(message, "junking spam from blocked sender")
move_message(message, "Junk Email")
return
# junk messages with blocked keywords
if is_in_message(BLOCK_KEYWORDS, message):
self._log_result(message, 'junking spam containing blocked keyword')
move_message(message, 'Junk Email')
self._log_result(message, "junking spam containing blocked keyword")
move_message(message, "Junk Email")
return
# junk messages from blocked domains
@@ -375,29 +382,32 @@ def filter_message(self, message):
is_spam = True
break
if is_spam:
self._log_result(message, 'junking spam from blocked domain')
move_message(message, 'Junk Email')
self._log_result(message, "junking spam from blocked domain")
move_message(message, "Junk Email")
return
# junk known spam headers
if (int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5
or get_header('X-Mailgun-List-Address', headers)
or get_header('X-SFDC-EmailCategory', headers) == 'apimassmail'):
self._log_result(message, 'junking spam with known header')
move_message(message, 'Junk Email')
if (
int(get_header("X-MS-Exchange-Organization-SCL", headers)) >= 5
or get_header("X-Mailgun-List-Address", headers)
or get_header("X-SFDC-EmailCategory", headers) == "apimassmail"
):
self._log_result(message, "junking spam with known header")
move_message(message, "Junk Email")
return
# add 'OSU Inform' category to internal messages sent to DLs
if ('@oregonstate.edu' in normalized_from
and ((not get_header('To', headers)
and not get_header('List-Id', headers))
or ('inform-c' in ' '.join(normalized_to)))):
if "@oregonstate.edu" in normalized_from and (
(not get_header("To", headers) and not get_header("List-Id", headers))
or ("inform-c" in " ".join(normalized_to))
):
self._log_result(message, "adding category 'OSU Inform'")
add_category(message, 'OSU Inform')
add_category(message, "OSU Inform")
return
# KEEP MESSAGE
self._log_result(message, 'keeping message, passed all filter checks')
self._log_result(message, "keeping message, passed all filter checks")
def is_in_message(list_, message):
"""search a message for a list of strings
@@ -408,38 +418,44 @@ def is_in_message(list_, message):
# normalize inputs
search_terms = [x.lower() for x in list_]
message_body = message.body.lower().replace('\ufeff', '')
message_subject = message.subject.lower().replace('\ufeff', '')
message_from = message.sender.address.lower().replace('\ufeff', '')
message_body = message.body.lower().replace("\ufeff", "")
message_subject = message.subject.lower().replace("\ufeff", "")
message_from = message.sender.address.lower().replace("\ufeff", "")
for term in search_terms:
if (re.search(term, message_subject)
or re.search(term, message_from)
or re.search(term, message_body)):
if (
re.search(term, message_subject)
or re.search(term, message_from)
or re.search(term, message_body)
):
is_found = True
break
return is_found
def move_message(message, folder_name):
""" move a message to a folder stored in the folder cache """
"""move a message to a folder stored in the folder cache"""
if folder_name not in folder_cache:
return False
message.move(folder_cache[folder_name])
def add_category(message, category_name):
""" add an Outlook category to a message """
"""add an Outlook category to a message"""
if category_name not in category_cache:
return False
message.add_category(category_cache[category_name])
message.save_message()
def init_filters(self):
# hack to copy a dict from parent object into local object
for k,v in self._folders.items():
for k, v in self._folders.items():
folder_cache[k] = v
for k,v in self._categories.items():
for k, v in self._categories.items():
category_cache[k] = v
def get_header(header_key, headers):
vals = []
for header in headers:
@@ -453,6 +469,7 @@ def get_header(header_key, headers):
else:
return False
def search_headers(search, headers):
for header in headers:
for val in header.values():

View File

@@ -13,38 +13,40 @@ SCRIPTPATH = os.path.dirname(os.path.abspath(__file__))
# parse config file
config = {}
configfile = configparser.ConfigParser()
configfile.read(SCRIPTPATH + '/mail-filter.conf')
config['FILTERS_FILE'] = configfile.get('main', 'Filters')
config['IS_DEBUG'] = configfile.getboolean('main', 'EnableDebugging')
config['CHECK_INTERVAL'] = int(configfile.get('main', 'MailCheckInterval'))
config['LOG_DIR'] = configfile.get('logging', 'LogDir')
config['TIMEZONE'] = configfile.get('logging', 'Timezone')
config['APP_CLIENT_ID'] = os.getenv('APP_CLIENT_ID')
config['APP_SECRET_KEY'] = os.getenv('APP_SECRET_KEY')
config['APP_TENANT_ID'] = os.getenv('APP_TENANT_ID')
configfile.read(SCRIPTPATH + "/mail-filter.conf")
config["FILTERS_FILE"] = configfile.get("main", "Filters")
config["IS_DEBUG"] = configfile.getboolean("main", "EnableDebugging")
config["CHECK_INTERVAL"] = int(configfile.get("main", "MailCheckInterval"))
config["LOG_DIR"] = configfile.get("logging", "LogDir")
config["TIMEZONE"] = configfile.get("logging", "Timezone")
config["APP_CLIENT_ID"] = os.getenv("APP_CLIENT_ID")
config["APP_SECRET_KEY"] = os.getenv("APP_SECRET_KEY")
config["APP_TENANT_ID"] = os.getenv("APP_TENANT_ID")
# convert timestamp to local time
def local_time(record, datefmt=None):
return pendulum.from_timestamp(
record.created,
tz=pendulum.timezone(config['TIMEZONE'])
).strftime('%Y-%m-%d %H:%M:%S %z')
record.created, tz=pendulum.timezone(config["TIMEZONE"])
).strftime("%Y-%m-%d %H:%M:%S %z")
# set up logger
logger = logging.getLogger('o365mf')
if config['IS_DEBUG']:
logger = logging.getLogger("o365mf")
if config["IS_DEBUG"]:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s %(module)s [%(levelname)s] %(message)s')
formatter = logging.Formatter("%(asctime)s %(module)s [%(levelname)s] %(message)s")
formatter.formatTime = local_time
log_filename = f"{config['LOG_DIR']}/mail-filter.log"
handler = logging.handlers.TimedRotatingFileHandler(
log_filename, when='midnight', backupCount=5)
log_filename, when="midnight", backupCount=5
)
handler.setFormatter(formatter)
logger.addHandler(handler)
# helper function for logging
def log(msg, level=logging.INFO):
print(msg)
@@ -53,9 +55,9 @@ def log(msg, level=logging.INFO):
class O365MailFilter(object):
_scopes = [
'basic',
'https://graph.microsoft.com/Mail.ReadWrite',
'https://graph.microsoft.com/MailboxSettings.Read'
"basic",
"https://graph.microsoft.com/Mail.ReadWrite",
"https://graph.microsoft.com/MailboxSettings.Read",
]
def __init__(self, config):
@@ -63,32 +65,30 @@ class O365MailFilter(object):
self._is_canceled = False
self._folders = {}
self._categories = {}
self._filtered_cache = {
'last_reset': None,
'ids': set()
}
self._filtered_cache = {"last_reset": None, "ids": set()}
# auth with O365
self._authenticate()
def _authenticate(self):
token_backend = FileSystemTokenBackend(token_path='.cache',
token_filename='token.txt')
token_backend = FileSystemTokenBackend(
token_path=".cache", token_filename="token.txt"
)
self._account = Account(
(self._config['APP_CLIENT_ID'], self._config['APP_SECRET_KEY']),
tenant_id=self._config['APP_TENANT_ID'],
token_backend=token_backend
(self._config["APP_CLIENT_ID"], self._config["APP_SECRET_KEY"]),
tenant_id=self._config["APP_TENANT_ID"],
token_backend=token_backend,
)
if not self._account.is_authenticated:
self._account.authenticate(scopes=self._scopes)
log('Authentication successful')
log("Authentication successful")
def _load_filters(self):
""" load filter code from a file on disk """
loader = SourceFileLoader('filters', self._config['FILTERS_FILE'])
"""load filter code from a file on disk"""
loader = SourceFileLoader("filters", self._config["FILTERS_FILE"])
module = loader.load_module()
module.init_filters(self)
# make 'filter_message()' implemented in the file available for use
@@ -96,14 +96,14 @@ class O365MailFilter(object):
self._filter_message = module.filter_message
def _load_folders(self, mailbox, folders=None, folder_path=None):
""" recursively cache folder IDs for this mailbox """
"""recursively cache folder IDs for this mailbox"""
if folders is None:
folders = mailbox.get_folders()
self._folders = {}
folder_path = ''
folder_path = ""
for folder in folders:
if folder_path == '':
if folder_path == "":
current_folder_path = f"{folder.name}"
else:
current_folder_path = f"{folder_path}/{folder.name}"
@@ -113,26 +113,22 @@ class O365MailFilter(object):
else:
# add child folders to the cache, because get_folders() doesn't
# descend into sub-folders by default
self._load_folders(mailbox, folder.get_folders(),
current_folder_path)
self._load_folders(mailbox, folder.get_folders(), current_folder_path)
def _load_categories(self):
""" cache Outlook categories for this account """
"""cache Outlook categories for this account"""
oc = self._account.outlook_categories()
categories = oc.get_categories()
for category in categories:
self._categories[category.name] = category
def _clear_cache(self):
""" clear the filtered message cache """
log('Clearing filtered message cache...', logging.DEBUG)
self._filtered_cache = {
'last_reset': pendulum.now(),
'ids': set()
}
"""clear the filtered message cache"""
log("Clearing filtered message cache...", logging.DEBUG)
self._filtered_cache = {"last_reset": pendulum.now(), "ids": set()}
def _repr_message(self, message):
""" returns a str representation of a message suitable for logging """
"""returns a str representation of a message suitable for logging"""
# to = ','.join([r.address for r in message.to])
return f"[FROM: {message.sender.address} SUBJ: {message.subject}]"
@@ -157,50 +153,56 @@ class O365MailFilter(object):
# the O365 library will not paginate results correctly
limit = self._account.protocol.max_top_value
query = inbox.new_query()
query = query.on_attribute('isRead').equals(False).select(
'to_recipients', 'from', 'subject', 'body',
'internet_message_headers'
query = (
query.on_attribute("isRead")
.equals(False)
.select(
"to_recipients", "from", "subject", "body", "internet_message_headers"
)
)
log(f" {pendulum.now()} Getting messages from inbox...", logging.DEBUG)
messages = inbox.get_messages(query=query, limit=limit, batch=25)
for message in messages:
log(f" {pendulum.now()} {message}", logging.DEBUG)
if message.object_id in self._filtered_cache['ids']:
if message.object_id in self._filtered_cache["ids"]:
# we've already filtered this message, so skip it
continue
self._filter_message(self, message)
self._filtered_cache['ids'].add(message.object_id)
self._filtered_cache["ids"].add(message.object_id)
def run(self):
""" run filter as a loop """
"""run filter as a loop"""
is_first_run = True
while not self._is_canceled:
# clear the filtered message cache if it's older than 4 hours
if (self._filtered_cache['last_reset'] is None or
self._filtered_cache['last_reset'] < pendulum.now().subtract(hours=4)):
if self._filtered_cache["last_reset"] is None or self._filtered_cache[
"last_reset"
] < pendulum.now().subtract(hours=4):
self._clear_cache()
log(f"Filtering the sludge @ {pendulum.now()}...", logging.DEBUG)
self.filter(is_first_run)
is_first_run = False
time.sleep(self._config['CHECK_INTERVAL'])
time.sleep(self._config["CHECK_INTERVAL"])
log('Done.')
log("Done.")
def exit(self):
self._is_canceled = True
log('Initializing O365 mail filter...')
log("Initializing O365 mail filter...")
o365mf = O365MailFilter(config)
def exit(signum, frame):
""" signal handler for a clean exit """
"""signal handler for a clean exit"""
log(f"Caught signal {signum}, exiting...")
o365mf.exit()
if __name__ == '__main__':
if __name__ == "__main__":
# register signal handlers
signal.signal(signal.SIGTERM, exit)
signal.signal(signal.SIGHUP, exit)