Compare commits
3 Commits
629942d447
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
663fbd1f39 | ||
|
|
d8417b4449 | ||
|
|
6cf6ecc78e |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
.env
|
||||
mail-filter.conf
|
||||
__pycache__
|
||||
|
||||
551
filter-rules.py
551
filter-rules.py
@@ -1,148 +1,149 @@
|
||||
import re
|
||||
|
||||
BLOCK_EMAIL = [
|
||||
'support-noreply@status.duosecurity.com',
|
||||
'alerts@alerts.mail.hpe.com',
|
||||
'viva-noreply@microsoft.com',
|
||||
'info@snowflake.com',
|
||||
'noreply-marketplace@zoom.us',
|
||||
'equal.opportunity@oregonstate.edu',
|
||||
'diversity@oregonstate.edu',
|
||||
'evals@oregonstate.edu',
|
||||
'sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com',
|
||||
'noreply@msexperience.microsoft.com',
|
||||
"support-noreply@status.duosecurity.com",
|
||||
"alerts@alerts.mail.hpe.com",
|
||||
"viva-noreply@microsoft.com",
|
||||
"info@snowflake.com",
|
||||
"noreply-marketplace@zoom.us",
|
||||
"equal.opportunity@oregonstate.edu",
|
||||
"diversity@oregonstate.edu",
|
||||
"evals@oregonstate.edu",
|
||||
"sv_uit_voicemail_cxe@oregonstateuniversity.onmicrosoft.com",
|
||||
"noreply@msexperience.microsoft.com",
|
||||
]
|
||||
|
||||
BLOCK_KEYWORDS = [
|
||||
'advertising service',
|
||||
'email campaign',
|
||||
'email preferences',
|
||||
'end these email updates',
|
||||
'hs-sales-engage.com',
|
||||
'long term care',
|
||||
'marketing',
|
||||
'megacast',
|
||||
'my subscription',
|
||||
'newsletter',
|
||||
'no longer would like to be contacted',
|
||||
'opt-out',
|
||||
'opt out',
|
||||
'piano',
|
||||
'pmp exam',
|
||||
'prefer not to receive',
|
||||
'prefer not to see',
|
||||
'rather not receive',
|
||||
'remove from list',
|
||||
'remove from this list',
|
||||
'Samsung SDS America',
|
||||
'sidekickopen',
|
||||
'subscription preferences',
|
||||
'survey',
|
||||
'take-me-off',
|
||||
'this advertisement',
|
||||
'unsub_center',
|
||||
'unsubscribe',
|
||||
'want to receive',
|
||||
'webcast',
|
||||
'webinar',
|
||||
'welder',
|
||||
'white paper',
|
||||
'whitepaper',
|
||||
'wish to be contacted',
|
||||
'wish to receive',
|
||||
'your notifications',
|
||||
'your preferences',
|
||||
'your subscription',
|
||||
"advertising service",
|
||||
"email campaign",
|
||||
"email preferences",
|
||||
"end these email updates",
|
||||
"hs-sales-engage.com",
|
||||
"long term care",
|
||||
"marketing",
|
||||
"megacast",
|
||||
"my subscription",
|
||||
"newsletter",
|
||||
"no longer would like to be contacted",
|
||||
"opt-out",
|
||||
"opt out",
|
||||
"piano",
|
||||
"pmp exam",
|
||||
"prefer not to receive",
|
||||
"prefer not to see",
|
||||
"rather not receive",
|
||||
"remove from list",
|
||||
"remove from this list",
|
||||
"Samsung SDS America",
|
||||
"sidekickopen",
|
||||
"subscription preferences",
|
||||
"survey",
|
||||
"take-me-off",
|
||||
"this advertisement",
|
||||
"unsub_center",
|
||||
"unsubscribe",
|
||||
"want to receive",
|
||||
"webcast",
|
||||
"webinar",
|
||||
"welder",
|
||||
"white paper",
|
||||
"whitepaper",
|
||||
"wish to be contacted",
|
||||
"wish to receive",
|
||||
"your notifications",
|
||||
"your preferences",
|
||||
"your subscription",
|
||||
]
|
||||
|
||||
BLOCK_DOMAINS = [
|
||||
'aafintl.com',
|
||||
'atscale.com',
|
||||
'astutechsolutions.com',
|
||||
'bytespeed.com',
|
||||
'checkpoint.com',
|
||||
'cmadvantage',
|
||||
'customeriomail.com',
|
||||
'denodo.com',
|
||||
'exacttarget.com',
|
||||
'freshsales.io',
|
||||
'hrciconnect.com',
|
||||
'hso.com',
|
||||
'ikigailabs.io',
|
||||
'impetus.com',
|
||||
'informareachmedia.com',
|
||||
'javentechnologies.com',
|
||||
'kuusakoski.com',
|
||||
'mailgun.net',
|
||||
'malwarebytes.com',
|
||||
'matrixservice.com',
|
||||
'mimecast.com',
|
||||
'mktomail.com',
|
||||
'msgfocus.com',
|
||||
'norstar.net',
|
||||
'orjuno.com',
|
||||
'pphosted.com',
|
||||
'qualitynetworks.com',
|
||||
'radware.com',
|
||||
'rsmatco.com',
|
||||
'rubrain.agency',
|
||||
'sendgrid.net',
|
||||
'sparkpostmail.com',
|
||||
'techmate.com',
|
||||
'thesourcery.com',
|
||||
'trustedmailservers.com',
|
||||
'ubtiinc.com',
|
||||
'zerowait.com',
|
||||
'znsrc.com'
|
||||
"aafintl.com",
|
||||
"atscale.com",
|
||||
"astutechsolutions.com",
|
||||
"bytespeed.com",
|
||||
"checkpoint.com",
|
||||
"cmadvantage",
|
||||
"customeriomail.com",
|
||||
"denodo.com",
|
||||
"exacttarget.com",
|
||||
"freshsales.io",
|
||||
"hrciconnect.com",
|
||||
"hso.com",
|
||||
"ikigailabs.io",
|
||||
"impetus.com",
|
||||
"informareachmedia.com",
|
||||
"javentechnologies.com",
|
||||
"kuusakoski.com",
|
||||
"mailgun.net",
|
||||
"malwarebytes.com",
|
||||
"matrixservice.com",
|
||||
"mimecast.com",
|
||||
"mktomail.com",
|
||||
"msgfocus.com",
|
||||
"norstar.net",
|
||||
"orjuno.com",
|
||||
"pphosted.com",
|
||||
"qualitynetworks.com",
|
||||
"radware.com",
|
||||
"rsmatco.com",
|
||||
"rubrain.agency",
|
||||
"sendgrid.net",
|
||||
"sparkpostmail.com",
|
||||
"techmate.com",
|
||||
"thesourcery.com",
|
||||
"trustedmailservers.com",
|
||||
"ubtiinc.com",
|
||||
"zerowait.com",
|
||||
"znsrc.com",
|
||||
]
|
||||
|
||||
BLOCK_ANNOYING = [
|
||||
'charitable fund drive',
|
||||
'election reminder',
|
||||
'faculty senate agenda',
|
||||
'food drive',
|
||||
'food share',
|
||||
'general election',
|
||||
'giving tuesday',
|
||||
'hardship leave donations needed',
|
||||
'register to vote',
|
||||
'service desk survey request',
|
||||
'special election',
|
||||
'voter registration'
|
||||
"charitable fund drive",
|
||||
"election reminder",
|
||||
"faculty senate agenda",
|
||||
"food drive",
|
||||
"food share",
|
||||
"general election",
|
||||
"giving tuesday",
|
||||
"hardship leave donations needed",
|
||||
"register to vote",
|
||||
"service desk survey request",
|
||||
"special election",
|
||||
"voter registration",
|
||||
]
|
||||
|
||||
ALLOW = [
|
||||
'oregonstate.edu',
|
||||
'github.com',
|
||||
'osu.atlassian.net',
|
||||
'oregonstateuniversity.atlassian.net',
|
||||
'sns.amazonaws.com',
|
||||
'opsgenie.net',
|
||||
'notify@teamdynamixapp.com',
|
||||
'newsbites@email.sans.org',
|
||||
'noreply@box.com',
|
||||
'noreply@email.teams.microsoft.com',
|
||||
'no-reply@sharepointonline.com',
|
||||
'govdelivery.com',
|
||||
'linkoregon.org',
|
||||
'pdxwit.org',
|
||||
'busyconf.com',
|
||||
'support@githubsupport.com',
|
||||
'microsoft.com',
|
||||
'docusign.net',
|
||||
'ideal-logic.com',
|
||||
'heliocampus.atlassian.net',
|
||||
'ctptravelservices.com',
|
||||
'Travel@concursolutions.com',
|
||||
'ghost.io',
|
||||
'orders@catertrax.com',
|
||||
'nytdirect@nytimes.com'
|
||||
"oregonstate.edu",
|
||||
"github.com",
|
||||
"osu.atlassian.net",
|
||||
"oregonstateuniversity.atlassian.net",
|
||||
"sns.amazonaws.com",
|
||||
"opsgenie.net",
|
||||
"notify@teamdynamixapp.com",
|
||||
"newsbites@email.sans.org",
|
||||
"noreply@box.com",
|
||||
"noreply@email.teams.microsoft.com",
|
||||
"no-reply@sharepointonline.com",
|
||||
"govdelivery.com",
|
||||
"linkoregon.org",
|
||||
"pdxwit.org",
|
||||
"busyconf.com",
|
||||
"support@githubsupport.com",
|
||||
"microsoft.com",
|
||||
"docusign.net",
|
||||
"ideal-logic.com",
|
||||
"heliocampus.atlassian.net",
|
||||
"ctptravelservices.com",
|
||||
"Travel@concursolutions.com",
|
||||
"ghost.io",
|
||||
"orders@catertrax.com",
|
||||
"nytdirect@nytimes.com",
|
||||
]
|
||||
|
||||
normalized = {}
|
||||
folder_cache = {}
|
||||
category_cache = {}
|
||||
|
||||
|
||||
def filter_message(self, message):
|
||||
# normalize message attributes
|
||||
normalized_to = [x.address.lower() for x in message.to]
|
||||
@@ -152,217 +153,223 @@ def filter_message(self, message):
|
||||
headers = []
|
||||
for header in message.message_headers:
|
||||
h = {}
|
||||
h[header['name']] = header['value'].lower()
|
||||
h[header["name"]] = header["value"].lower()
|
||||
headers.append(h)
|
||||
|
||||
# filter unactionable IAR emails
|
||||
automated_sources = [
|
||||
'coresys@lists.oregonstate.edu',
|
||||
'iar.ref@oregonstate.edu',
|
||||
'iar.systems-team@oregonstate.edu',
|
||||
'changes_osu@heliocampus.com',
|
||||
'no-reply@vmockmail.com',
|
||||
'noreply-beaverhub@oregonstate.edu',
|
||||
'chatter-beaverhub@oregonstate.edu',
|
||||
"coresys@lists.oregonstate.edu",
|
||||
"iar.ref@oregonstate.edu",
|
||||
"iar.systems-team@oregonstate.edu",
|
||||
"changes_osu@heliocampus.com",
|
||||
"no-reply@vmockmail.com",
|
||||
"noreply-beaverhub@oregonstate.edu",
|
||||
"chatter-beaverhub@oregonstate.edu",
|
||||
]
|
||||
if bool([x for x in automated_sources if(x in normalized_from)]):
|
||||
unactionable = [x.lower() for x in [
|
||||
'DWPRODRAW Verification',
|
||||
'Job Monitor',
|
||||
' - OK',
|
||||
'ODProd Row Count Comparison',
|
||||
'Oracle ODprod Sessions Older Than Today',
|
||||
'DSDB Server Audit',
|
||||
'UserBase.Users users deactivated due to ORG changes and termination',
|
||||
'SSRS Datasets Needing Caching',
|
||||
'Audit Update',
|
||||
'ScholarUniverse to GRRS load',
|
||||
'OACIS Pending Actions Notice',
|
||||
'You Have OSUF Disapproved Reimbursements',
|
||||
'Your GRRS to Banner scholarship load report',
|
||||
'You Have OSUF Reimbursements to Review',
|
||||
'You Have Scholarship Payment Plans',
|
||||
'You Have Direct Payment Requests to Review',
|
||||
'You Have Disapproved Scholarship Payment Plans',
|
||||
'OSUF Reimbursements Needing Your Prompt Attention',
|
||||
'Redistribution Verification Error',
|
||||
'Direct Payment Request',
|
||||
'Status Change in your Detail Code Request',
|
||||
'loaded into the Index Reimbursement System',
|
||||
'JV required for',
|
||||
'Changes via Tableau REST API at OSU',
|
||||
'DSDBTEST',
|
||||
'VMock CSV S3 Upload',
|
||||
'Your Daily Digest for Oregon State University'
|
||||
]]
|
||||
if bool([x for x in automated_sources if (x in normalized_from)]):
|
||||
unactionable = [
|
||||
x.lower()
|
||||
for x in [
|
||||
"DWPRODRAW Verification",
|
||||
"Job Monitor",
|
||||
" - OK",
|
||||
"ODProd Row Count Comparison",
|
||||
"Oracle ODprod Sessions Older Than Today",
|
||||
"DSDB Server Audit",
|
||||
"UserBase.Users users deactivated due to ORG changes and termination",
|
||||
"SSRS Datasets Needing Caching",
|
||||
"Audit Update",
|
||||
"ScholarUniverse to GRRS load",
|
||||
"OACIS Pending Actions Notice",
|
||||
"You Have OSUF Disapproved Reimbursements",
|
||||
"Your GRRS to Banner scholarship load report",
|
||||
"You Have OSUF Reimbursements to Review",
|
||||
"You Have Scholarship Payment Plans",
|
||||
"You Have Direct Payment Requests to Review",
|
||||
"You Have Disapproved Scholarship Payment Plans",
|
||||
"OSUF Reimbursements Needing Your Prompt Attention",
|
||||
"Redistribution Verification Error",
|
||||
"Direct Payment Request",
|
||||
"Status Change in your Detail Code Request",
|
||||
"loaded into the Index Reimbursement System",
|
||||
"JV required for",
|
||||
"Changes via Tableau REST API at OSU",
|
||||
"DSDBTEST",
|
||||
"VMock CSV S3 Upload",
|
||||
"Your Daily Digest for Oregon State University",
|
||||
]
|
||||
]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
self._log_result(message, "moving to unactionable")
|
||||
move_message(message, "zzz-unactionable")
|
||||
return
|
||||
|
||||
# filter servicenow email notifications
|
||||
if 'mysupport-replies@oregonstate.edu' in normalized_from:
|
||||
keep = [
|
||||
'opened on your behalf',
|
||||
'your request REQ'
|
||||
]
|
||||
if "mysupport-replies@oregonstate.edu" in normalized_from:
|
||||
keep = ["opened on your behalf", "your request REQ"]
|
||||
if is_in_message(keep, message):
|
||||
self._log_result(message, "keeping servicenow message")
|
||||
return
|
||||
else:
|
||||
self._log_result(message, 'moving to servicenow')
|
||||
move_message(message, 'zzz-servicenow')
|
||||
self._log_result(message, "moving to servicenow")
|
||||
move_message(message, "zzz-servicenow")
|
||||
return
|
||||
|
||||
# filter unactionable InCommon SSL cert emails
|
||||
if 'support@cert-manager.com' in normalized_from:
|
||||
actionable = [
|
||||
'iar',
|
||||
'sig',
|
||||
'analytics',
|
||||
'cwp-access',
|
||||
'dsdb',
|
||||
'tableau'
|
||||
]
|
||||
if "support@cert-manager.com" in normalized_from:
|
||||
actionable = ["iar", "sig", "analytics", "cwp-access", "dsdb", "tableau"]
|
||||
if is_in_message(actionable, message):
|
||||
self._log_result(message,
|
||||
f"keeping message for actionable SSL notification")
|
||||
self._log_result(
|
||||
message, f"keeping message for actionable SSL notification"
|
||||
)
|
||||
return
|
||||
else:
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
self._log_result(message, "moving to unactionable")
|
||||
move_message(message, "zzz-unactionable")
|
||||
return
|
||||
|
||||
# filter Student CRM
|
||||
if 'noreply-beaverhub@oregonstate.edu' in normalized_from:
|
||||
if "noreply-beaverhub@oregonstate.edu" in normalized_from:
|
||||
unactionable = [
|
||||
'has been assigned to you or your queue',
|
||||
"has been assigned to you or your queue",
|
||||
]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
self._log_result(message, "moving to unactionable")
|
||||
move_message(message, "zzz-unactionable")
|
||||
return
|
||||
|
||||
# filter dependabot
|
||||
if ('dependabot[bot]' in str(message.sender)
|
||||
or is_in_message(['Your Dependabot alerts'], message)):
|
||||
self._log_result(message, 'moving to dependabot')
|
||||
move_message(message, 'zzz-dependabot')
|
||||
if "dependabot[bot]" in str(message.sender) or is_in_message(
|
||||
["Your Dependabot alerts"], message
|
||||
):
|
||||
self._log_result(message, "moving to dependabot")
|
||||
move_message(message, "zzz-dependabot")
|
||||
return
|
||||
|
||||
# filter ACTWON
|
||||
if 'actwon_administration@lists.oregonstate.edu' in normalized_from:
|
||||
self._log_result(message, 'moving to ACTWON')
|
||||
move_message(message, 'lists/ACTWON')
|
||||
if "actwon_administration@lists.oregonstate.edu" in normalized_from:
|
||||
self._log_result(message, "moving to ACTWON")
|
||||
move_message(message, "lists/ACTWON")
|
||||
return
|
||||
|
||||
# filter ACUG
|
||||
if 'isacug@oregonstate.edu' in normalized_to:
|
||||
self._log_result(message, 'moving to ACUG')
|
||||
move_message(message, 'lists/ACUG')
|
||||
if "isacug@oregonstate.edu" in normalized_to:
|
||||
self._log_result(message, "moving to ACUG")
|
||||
move_message(message, "lists/ACUG")
|
||||
return
|
||||
|
||||
# filter HelioCampus helpdesk
|
||||
if 'jira@heliocampus.atlassian.net' in normalized_from:
|
||||
self._log_result(message, 'moving to zzz-hc-helpdesk')
|
||||
move_message(message, 'zzz-hc-helpdesk')
|
||||
if "jira@heliocampus.atlassian.net" in normalized_from:
|
||||
self._log_result(message, "moving to zzz-hc-helpdesk")
|
||||
move_message(message, "zzz-hc-helpdesk")
|
||||
return
|
||||
|
||||
# filter alerts-sig
|
||||
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
|
||||
'mcc-b12-stor1@oregonstate.edu',
|
||||
'ousclus@oregonstate.edu',
|
||||
'isilon@storage.sig.oregonstate.edu',
|
||||
'me4012@sig.oregonstate.edu']
|
||||
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
|
||||
self._log_result(message, 'moving to alerts-sig')
|
||||
move_message(message, 'lists/alerts-sig')
|
||||
if (
|
||||
normalized_from
|
||||
in [
|
||||
"mcc-b11-stor1@oregonstate.edu",
|
||||
"mcc-b12-stor1@oregonstate.edu",
|
||||
"ousclus@oregonstate.edu",
|
||||
"isilon@storage.sig.oregonstate.edu",
|
||||
"me4012@sig.oregonstate.edu",
|
||||
]
|
||||
or "alarm.DatastoreDiskUsageAlarm" in message.subject
|
||||
):
|
||||
self._log_result(message, "moving to alerts-sig")
|
||||
move_message(message, "lists/alerts-sig")
|
||||
return
|
||||
|
||||
# filter backup-nightly
|
||||
if 'backup-nightly@lists.oregonstate.edu' in message.to:
|
||||
self._log_result(message, 'moving to backup-nightly')
|
||||
move_message(message, 'lists/backup-nightly')
|
||||
if "backup-nightly@lists.oregonstate.edu" in message.to:
|
||||
self._log_result(message, "moving to backup-nightly")
|
||||
move_message(message, "lists/backup-nightly")
|
||||
return
|
||||
|
||||
# filter quarantine spam
|
||||
if 'quarantine@messaging.microsoft.com' in normalized_from:
|
||||
self._log_result(message, 'moving to unactionable')
|
||||
move_message(message, 'zzz-unactionable')
|
||||
if "quarantine@messaging.microsoft.com" in normalized_from:
|
||||
self._log_result(message, "moving to unactionable")
|
||||
move_message(message, "zzz-unactionable")
|
||||
return
|
||||
|
||||
# delete atlassian spam
|
||||
if 'confluence@osu.atlassian.net' in normalized_from:
|
||||
if "confluence@osu.atlassian.net" in normalized_from:
|
||||
unactionable = [
|
||||
'sv1_ds_atlassian',
|
||||
'[confluence] daily digest',
|
||||
"sv1_ds_atlassian",
|
||||
"[confluence] daily digest",
|
||||
]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'deleting atlassian spam')
|
||||
self._log_result(message, "deleting atlassian spam")
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete salesforce spam
|
||||
if 'salesforce.com' in normalized_from:
|
||||
unactionable = [x.lower() for x in [
|
||||
'sandbox',
|
||||
'resetting your Salesforce password',
|
||||
'new Salesforce security token',
|
||||
]]
|
||||
if "salesforce.com" in normalized_from:
|
||||
unactionable = [
|
||||
x.lower()
|
||||
for x in [
|
||||
"sandbox",
|
||||
"resetting your Salesforce password",
|
||||
"new Salesforce security token",
|
||||
]
|
||||
]
|
||||
if is_in_message(unactionable, message):
|
||||
self._log_result(message, 'deleting salesforce spam')
|
||||
self._log_result(message, "deleting salesforce spam")
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete rave junk
|
||||
if ('guardian@getrave.com' in normalized_from
|
||||
and 'new guardian chat' in message.subject.lower()):
|
||||
self._log_result(message, 'deleting rave spam')
|
||||
if (
|
||||
"guardian@getrave.com" in normalized_from
|
||||
and "new guardian chat" in message.subject.lower()
|
||||
):
|
||||
self._log_result(message, "deleting rave spam")
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete conference spam
|
||||
if ('brock+conf@onid.oregonstate.edu' in normalized_to
|
||||
or 'brock+conf@oregonstate.edu' in normalized_to):
|
||||
self._log_result(message, 'deleting conference spam')
|
||||
if (
|
||||
"brock+conf@onid.oregonstate.edu" in normalized_to
|
||||
or "brock+conf@oregonstate.edu" in normalized_to
|
||||
):
|
||||
self._log_result(message, "deleting conference spam")
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete OSU IT Managers list spam
|
||||
if 'osuitmanagers@oregonstate.edu' in normalized_to:
|
||||
self._log_result(message, 'deleting osu it managers spam')
|
||||
if "osuitmanagers@oregonstate.edu" in normalized_to:
|
||||
self._log_result(message, "deleting osu it managers spam")
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# delete other spam
|
||||
if is_in_message(BLOCK_ANNOYING, message):
|
||||
self._log_result(message, 'junking spam containing annoying content')
|
||||
move_message(message, 'Junk Email')
|
||||
self._log_result(message, "junking spam containing annoying content")
|
||||
move_message(message, "Junk Email")
|
||||
return
|
||||
|
||||
# add 'HelioCampus' category to messages from HC
|
||||
if '@heliocampus' in normalized_from:
|
||||
if "@heliocampus" in normalized_from:
|
||||
self._log_result(message, "adding category 'HelioCampus'")
|
||||
add_category(message, 'HelioCampus')
|
||||
add_category(message, "HelioCampus")
|
||||
return
|
||||
|
||||
# keep messages from allowed emails and domains
|
||||
for good in ALLOW:
|
||||
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
|
||||
self._log_result(message,
|
||||
f"keeping message from allowed sender {good}")
|
||||
self._log_result(message, f"keeping message from allowed sender {good}")
|
||||
return
|
||||
|
||||
# junk messages from blocked senders
|
||||
if normalized_from in BLOCK_EMAIL:
|
||||
self._log_result(message, 'junking spam from blocked sender')
|
||||
move_message(message, 'Junk Email')
|
||||
self._log_result(message, "junking spam from blocked sender")
|
||||
move_message(message, "Junk Email")
|
||||
return
|
||||
|
||||
# junk messages with blocked keywords
|
||||
if is_in_message(BLOCK_KEYWORDS, message):
|
||||
self._log_result(message, 'junking spam containing blocked keyword')
|
||||
move_message(message, 'Junk Email')
|
||||
self._log_result(message, "junking spam containing blocked keyword")
|
||||
move_message(message, "Junk Email")
|
||||
return
|
||||
|
||||
# junk messages from blocked domains
|
||||
@@ -375,29 +382,32 @@ def filter_message(self, message):
|
||||
is_spam = True
|
||||
break
|
||||
if is_spam:
|
||||
self._log_result(message, 'junking spam from blocked domain')
|
||||
move_message(message, 'Junk Email')
|
||||
self._log_result(message, "junking spam from blocked domain")
|
||||
move_message(message, "Junk Email")
|
||||
return
|
||||
|
||||
# junk known spam headers
|
||||
if (int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5
|
||||
or get_header('X-Mailgun-List-Address', headers)
|
||||
or get_header('X-SFDC-EmailCategory', headers) == 'apimassmail'):
|
||||
self._log_result(message, 'junking spam with known header')
|
||||
move_message(message, 'Junk Email')
|
||||
if (
|
||||
int(get_header("X-MS-Exchange-Organization-SCL", headers)) >= 5
|
||||
or get_header("X-Mailgun-List-Address", headers)
|
||||
or get_header("X-SFDC-EmailCategory", headers) == "apimassmail"
|
||||
):
|
||||
self._log_result(message, "junking spam with known header")
|
||||
move_message(message, "Junk Email")
|
||||
return
|
||||
|
||||
# add 'OSU Inform' category to internal messages sent to DLs
|
||||
if ('@oregonstate.edu' in normalized_from
|
||||
and ((not get_header('To', headers)
|
||||
and not get_header('List-Id', headers))
|
||||
or ('inform-c' in ' '.join(normalized_to)))):
|
||||
if "@oregonstate.edu" in normalized_from and (
|
||||
(not get_header("To", headers) and not get_header("List-Id", headers))
|
||||
or ("inform-c" in " ".join(normalized_to))
|
||||
):
|
||||
self._log_result(message, "adding category 'OSU Inform'")
|
||||
add_category(message, 'OSU Inform')
|
||||
add_category(message, "OSU Inform")
|
||||
return
|
||||
|
||||
# KEEP MESSAGE
|
||||
self._log_result(message, 'keeping message, passed all filter checks')
|
||||
self._log_result(message, "keeping message, passed all filter checks")
|
||||
|
||||
|
||||
def is_in_message(list_, message):
|
||||
"""search a message for a list of strings
|
||||
@@ -408,38 +418,44 @@ def is_in_message(list_, message):
|
||||
|
||||
# normalize inputs
|
||||
search_terms = [x.lower() for x in list_]
|
||||
message_body = message.body.lower().replace('\ufeff', '')
|
||||
message_subject = message.subject.lower().replace('\ufeff', '')
|
||||
message_from = message.sender.address.lower().replace('\ufeff', '')
|
||||
message_body = message.body.lower().replace("\ufeff", "")
|
||||
message_subject = message.subject.lower().replace("\ufeff", "")
|
||||
message_from = message.sender.address.lower().replace("\ufeff", "")
|
||||
|
||||
for term in search_terms:
|
||||
if (re.search(term, message_subject)
|
||||
or re.search(term, message_from)
|
||||
or re.search(term, message_body)):
|
||||
if (
|
||||
re.search(term, message_subject)
|
||||
or re.search(term, message_from)
|
||||
or re.search(term, message_body)
|
||||
):
|
||||
is_found = True
|
||||
break
|
||||
return is_found
|
||||
|
||||
|
||||
def move_message(message, folder_name):
|
||||
""" move a message to a folder stored in the folder cache """
|
||||
"""move a message to a folder stored in the folder cache"""
|
||||
if folder_name not in folder_cache:
|
||||
return False
|
||||
message.move(folder_cache[folder_name])
|
||||
|
||||
|
||||
def add_category(message, category_name):
|
||||
""" add an Outlook category to a message """
|
||||
"""add an Outlook category to a message"""
|
||||
if category_name not in category_cache:
|
||||
return False
|
||||
message.add_category(category_cache[category_name])
|
||||
message.save_message()
|
||||
|
||||
|
||||
def init_filters(self):
|
||||
# hack to copy a dict from parent object into local object
|
||||
for k,v in self._folders.items():
|
||||
for k, v in self._folders.items():
|
||||
folder_cache[k] = v
|
||||
for k,v in self._categories.items():
|
||||
for k, v in self._categories.items():
|
||||
category_cache[k] = v
|
||||
|
||||
|
||||
def get_header(header_key, headers):
|
||||
vals = []
|
||||
for header in headers:
|
||||
@@ -453,6 +469,7 @@ def get_header(header_key, headers):
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def search_headers(search, headers):
|
||||
for header in headers:
|
||||
for val in header.values():
|
||||
|
||||
118
mail-filter.py
118
mail-filter.py
@@ -13,38 +13,40 @@ SCRIPTPATH = os.path.dirname(os.path.abspath(__file__))
|
||||
# parse config file
|
||||
config = {}
|
||||
configfile = configparser.ConfigParser()
|
||||
configfile.read(SCRIPTPATH + '/mail-filter.conf')
|
||||
config['FILTERS_FILE'] = configfile.get('main', 'Filters')
|
||||
config['IS_DEBUG'] = configfile.getboolean('main', 'EnableDebugging')
|
||||
config['CHECK_INTERVAL'] = int(configfile.get('main', 'MailCheckInterval'))
|
||||
config['LOG_DIR'] = configfile.get('logging', 'LogDir')
|
||||
config['TIMEZONE'] = configfile.get('logging', 'Timezone')
|
||||
config['APP_CLIENT_ID'] = os.getenv('APP_CLIENT_ID')
|
||||
config['APP_SECRET_KEY'] = os.getenv('APP_SECRET_KEY')
|
||||
config['APP_TENANT_ID'] = os.getenv('APP_TENANT_ID')
|
||||
configfile.read(SCRIPTPATH + "/mail-filter.conf")
|
||||
config["FILTERS_FILE"] = configfile.get("main", "Filters")
|
||||
config["IS_DEBUG"] = configfile.getboolean("main", "EnableDebugging")
|
||||
config["CHECK_INTERVAL"] = int(configfile.get("main", "MailCheckInterval"))
|
||||
config["LOG_DIR"] = configfile.get("logging", "LogDir")
|
||||
config["TIMEZONE"] = configfile.get("logging", "Timezone")
|
||||
config["APP_CLIENT_ID"] = os.getenv("APP_CLIENT_ID")
|
||||
config["APP_SECRET_KEY"] = os.getenv("APP_SECRET_KEY")
|
||||
config["APP_TENANT_ID"] = os.getenv("APP_TENANT_ID")
|
||||
|
||||
|
||||
# convert timestamp to local time
|
||||
def local_time(record, datefmt=None):
|
||||
return pendulum.from_timestamp(
|
||||
record.created,
|
||||
tz=pendulum.timezone(config['TIMEZONE'])
|
||||
).strftime('%Y-%m-%d %H:%M:%S %z')
|
||||
record.created, tz=pendulum.timezone(config["TIMEZONE"])
|
||||
).strftime("%Y-%m-%d %H:%M:%S %z")
|
||||
|
||||
|
||||
# set up logger
|
||||
logger = logging.getLogger('o365mf')
|
||||
if config['IS_DEBUG']:
|
||||
logger = logging.getLogger("o365mf")
|
||||
if config["IS_DEBUG"]:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
logger.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s %(module)s [%(levelname)s] %(message)s')
|
||||
formatter = logging.Formatter("%(asctime)s %(module)s [%(levelname)s] %(message)s")
|
||||
formatter.formatTime = local_time
|
||||
log_filename = f"{config['LOG_DIR']}/mail-filter.log"
|
||||
handler = logging.handlers.TimedRotatingFileHandler(
|
||||
log_filename, when='midnight', backupCount=5)
|
||||
log_filename, when="midnight", backupCount=5
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
# helper function for logging
|
||||
def log(msg, level=logging.INFO):
|
||||
print(msg)
|
||||
@@ -53,9 +55,9 @@ def log(msg, level=logging.INFO):
|
||||
|
||||
class O365MailFilter(object):
|
||||
_scopes = [
|
||||
'basic',
|
||||
'https://graph.microsoft.com/Mail.ReadWrite',
|
||||
'https://graph.microsoft.com/MailboxSettings.Read'
|
||||
"basic",
|
||||
"https://graph.microsoft.com/Mail.ReadWrite",
|
||||
"https://graph.microsoft.com/MailboxSettings.Read",
|
||||
]
|
||||
|
||||
def __init__(self, config):
|
||||
@@ -63,32 +65,30 @@ class O365MailFilter(object):
|
||||
self._is_canceled = False
|
||||
self._folders = {}
|
||||
self._categories = {}
|
||||
self._filtered_cache = {
|
||||
'last_reset': None,
|
||||
'ids': set()
|
||||
}
|
||||
self._filtered_cache = {"last_reset": None, "ids": set()}
|
||||
|
||||
# auth with O365
|
||||
self._authenticate()
|
||||
|
||||
def _authenticate(self):
|
||||
token_backend = FileSystemTokenBackend(token_path='.cache',
|
||||
token_filename='token.txt')
|
||||
token_backend = FileSystemTokenBackend(
|
||||
token_path=".cache", token_filename="token.txt"
|
||||
)
|
||||
|
||||
self._account = Account(
|
||||
(self._config['APP_CLIENT_ID'], self._config['APP_SECRET_KEY']),
|
||||
tenant_id=self._config['APP_TENANT_ID'],
|
||||
token_backend=token_backend
|
||||
(self._config["APP_CLIENT_ID"], self._config["APP_SECRET_KEY"]),
|
||||
tenant_id=self._config["APP_TENANT_ID"],
|
||||
token_backend=token_backend,
|
||||
)
|
||||
|
||||
if not self._account.is_authenticated:
|
||||
self._account.authenticate(scopes=self._scopes)
|
||||
|
||||
log('Authentication successful')
|
||||
log("Authentication successful")
|
||||
|
||||
def _load_filters(self):
|
||||
""" load filter code from a file on disk """
|
||||
loader = SourceFileLoader('filters', self._config['FILTERS_FILE'])
|
||||
"""load filter code from a file on disk"""
|
||||
loader = SourceFileLoader("filters", self._config["FILTERS_FILE"])
|
||||
module = loader.load_module()
|
||||
module.init_filters(self)
|
||||
# make 'filter_message()' implemented in the file available for use
|
||||
@@ -96,14 +96,14 @@ class O365MailFilter(object):
|
||||
self._filter_message = module.filter_message
|
||||
|
||||
def _load_folders(self, mailbox, folders=None, folder_path=None):
|
||||
""" recursively cache folder IDs for this mailbox """
|
||||
"""recursively cache folder IDs for this mailbox"""
|
||||
if folders is None:
|
||||
folders = mailbox.get_folders()
|
||||
self._folders = {}
|
||||
folder_path = ''
|
||||
folder_path = ""
|
||||
|
||||
for folder in folders:
|
||||
if folder_path == '':
|
||||
if folder_path == "":
|
||||
current_folder_path = f"{folder.name}"
|
||||
else:
|
||||
current_folder_path = f"{folder_path}/{folder.name}"
|
||||
@@ -113,26 +113,22 @@ class O365MailFilter(object):
|
||||
else:
|
||||
# add child folders to the cache, because get_folders() doesn't
|
||||
# descend into sub-folders by default
|
||||
self._load_folders(mailbox, folder.get_folders(),
|
||||
current_folder_path)
|
||||
self._load_folders(mailbox, folder.get_folders(), current_folder_path)
|
||||
|
||||
def _load_categories(self):
|
||||
""" cache Outlook categories for this account """
|
||||
"""cache Outlook categories for this account"""
|
||||
oc = self._account.outlook_categories()
|
||||
categories = oc.get_categories()
|
||||
for category in categories:
|
||||
self._categories[category.name] = category
|
||||
|
||||
def _clear_cache(self):
|
||||
""" clear the filtered message cache """
|
||||
log('Clearing filtered message cache...', logging.DEBUG)
|
||||
self._filtered_cache = {
|
||||
'last_reset': pendulum.now(),
|
||||
'ids': set()
|
||||
}
|
||||
"""clear the filtered message cache"""
|
||||
log("Clearing filtered message cache...", logging.DEBUG)
|
||||
self._filtered_cache = {"last_reset": pendulum.now(), "ids": set()}
|
||||
|
||||
def _repr_message(self, message):
|
||||
""" returns a str representation of a message suitable for logging """
|
||||
"""returns a str representation of a message suitable for logging"""
|
||||
# to = ','.join([r.address for r in message.to])
|
||||
return f"[FROM: {message.sender.address} SUBJ: {message.subject}]"
|
||||
|
||||
@@ -157,50 +153,56 @@ class O365MailFilter(object):
|
||||
# the O365 library will not paginate results correctly
|
||||
limit = self._account.protocol.max_top_value
|
||||
query = inbox.new_query()
|
||||
query = query.on_attribute('isRead').equals(False).select(
|
||||
'to_recipients', 'from', 'subject', 'body',
|
||||
'internet_message_headers'
|
||||
query = (
|
||||
query.on_attribute("isRead")
|
||||
.equals(False)
|
||||
.select(
|
||||
"to_recipients", "from", "subject", "body", "internet_message_headers"
|
||||
)
|
||||
)
|
||||
log(f" {pendulum.now()} Getting messages from inbox...", logging.DEBUG)
|
||||
messages = inbox.get_messages(query=query, limit=limit, batch=25)
|
||||
|
||||
for message in messages:
|
||||
log(f" {pendulum.now()} {message}", logging.DEBUG)
|
||||
if message.object_id in self._filtered_cache['ids']:
|
||||
if message.object_id in self._filtered_cache["ids"]:
|
||||
# we've already filtered this message, so skip it
|
||||
continue
|
||||
self._filter_message(self, message)
|
||||
self._filtered_cache['ids'].add(message.object_id)
|
||||
self._filtered_cache["ids"].add(message.object_id)
|
||||
|
||||
def run(self):
|
||||
""" run filter as a loop """
|
||||
"""run filter as a loop"""
|
||||
is_first_run = True
|
||||
while not self._is_canceled:
|
||||
# clear the filtered message cache if it's older than 4 hours
|
||||
if (self._filtered_cache['last_reset'] is None or
|
||||
self._filtered_cache['last_reset'] < pendulum.now().subtract(hours=4)):
|
||||
if self._filtered_cache["last_reset"] is None or self._filtered_cache[
|
||||
"last_reset"
|
||||
] < pendulum.now().subtract(hours=4):
|
||||
self._clear_cache()
|
||||
|
||||
log(f"Filtering the sludge @ {pendulum.now()}...", logging.DEBUG)
|
||||
self.filter(is_first_run)
|
||||
is_first_run = False
|
||||
time.sleep(self._config['CHECK_INTERVAL'])
|
||||
time.sleep(self._config["CHECK_INTERVAL"])
|
||||
|
||||
log('Done.')
|
||||
log("Done.")
|
||||
|
||||
def exit(self):
|
||||
self._is_canceled = True
|
||||
|
||||
|
||||
log('Initializing O365 mail filter...')
|
||||
log("Initializing O365 mail filter...")
|
||||
o365mf = O365MailFilter(config)
|
||||
|
||||
|
||||
def exit(signum, frame):
|
||||
""" signal handler for a clean exit """
|
||||
"""signal handler for a clean exit"""
|
||||
log(f"Caught signal {signum}, exiting...")
|
||||
o365mf.exit()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
# register signal handlers
|
||||
signal.signal(signal.SIGTERM, exit)
|
||||
signal.signal(signal.SIGHUP, exit)
|
||||
|
||||
Reference in New Issue
Block a user