Initial commit
This commit is contained in:
4
.cache/.gitignore
vendored
Normal file
4
.cache/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# Ignore everything in this directory
|
||||
*
|
||||
# Except this file
|
||||
!.gitignore
|
||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
mail-filter.conf
|
||||
__pycache__
|
||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# o365-mail-filter
|
||||
|
||||
... because spam takes many forms.
|
||||
180
filter-rules.py
Normal file
180
filter-rules.py
Normal file
@@ -0,0 +1,180 @@
|
||||
BLOCK_EMAIL = [
|
||||
'chirhart@amazon.com',
|
||||
'ron.krogel@citrix.com',
|
||||
'replieswelcome@duo.com',
|
||||
'webinars@duo.com',
|
||||
'diversity@oregonstate.edu',
|
||||
'jramiro@pagerduty.com',
|
||||
'info@snowflake.com',
|
||||
'lisa@duo.com',
|
||||
'cortana@microsoft.com'
|
||||
]
|
||||
|
||||
BLOCK_KEYWORDS = [
|
||||
'charitable fund drive',
|
||||
'election reminder',
|
||||
'email preferences',
|
||||
'end these email updates',
|
||||
'food drive',
|
||||
'food share',
|
||||
'general election',
|
||||
'giving tuesday',
|
||||
'hardship leave donations needed',
|
||||
'manage your preferences',
|
||||
'modify your preferences',
|
||||
'opt-out',
|
||||
'opt out',
|
||||
'prefer not to receive',
|
||||
'prefer not to see',
|
||||
'register to vote'
|
||||
'Samsung SDS America',
|
||||
'sidekickopen',
|
||||
'special election',
|
||||
'subscription preferences',
|
||||
'survey',
|
||||
'unsubscribe',
|
||||
'voter registration',
|
||||
'want to receive',
|
||||
'webinar',
|
||||
'whitepaper',
|
||||
'wish to receive',
|
||||
]
|
||||
|
||||
BLOCK_DOMAINS = [
|
||||
]
|
||||
|
||||
ALLOW = [
|
||||
'oregonstate.edu',
|
||||
'github.com',
|
||||
'duo.com',
|
||||
'sns.amazonaws.com',
|
||||
'opsgenie.net',
|
||||
'notify@teamdynamixapp.com',
|
||||
'newsbites@email.sans.org',
|
||||
'noreply@box.com',
|
||||
'noreply@email.teams.microsoft.com',
|
||||
'no-reply@sharepointonline.com',
|
||||
'slalom.com',
|
||||
'govdelivery.com',
|
||||
'linkoregon.org',
|
||||
'megan@pdxwit.org',
|
||||
'busyconf.com',
|
||||
'support@githubsupport.com',
|
||||
'microsoft.com'
|
||||
]
|
||||
|
||||
def filter_message(self, message):
|
||||
# normalize message attributes
|
||||
normalized_to = [x.address.lower() for x in message.to]
|
||||
normalized_from = message.sender.address.lower()
|
||||
normalized_subject = message.subject.lower()
|
||||
|
||||
# filter alerts-sig
|
||||
if (normalized_from in ['mcc-b11-stor1@oregonstate.edu',
|
||||
'mcc-b12-stor1@oregonstate.edu',
|
||||
'ousclus@oregonstate.edu',
|
||||
'isilon@storage.sig.oregonstate.edu']
|
||||
or 'alarm.DatastoreDiskUsageAlarm' in message.subject):
|
||||
self._log_result(message, 'moving to alerts-sig')
|
||||
message.move(self._folders['alerts-sig'])
|
||||
return
|
||||
|
||||
# filter conference spam
|
||||
if 'brocks+conf@onid.oregonstate.edu' in normalized_to:
|
||||
self._log_result(message, 'deleting conference spam')
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# filter backup-nightly
|
||||
if 'backup-nightly@lists.oregonstate.edu' in message.to:
|
||||
self._log_result(message, 'moving to backup-nightly')
|
||||
message.move(self._folders['backup-nightly'])
|
||||
return
|
||||
|
||||
# delete HP alert spam
|
||||
if normalized_from == 'alerts@alerts.mail.hpe.com':
|
||||
self._log_result(message, 'deleting HP alert spam')
|
||||
message.delete()
|
||||
return
|
||||
|
||||
# keep messages from allowed emails and domains
|
||||
for good in ALLOW:
|
||||
if good in normalized_from and normalized_from not in BLOCK_EMAIL:
|
||||
self._log_result(message,
|
||||
f"keeping message from allowed sender {good}")
|
||||
return
|
||||
|
||||
# junk messages from blocked senders
|
||||
if normalized_from in BLOCK_EMAIL:
|
||||
self._log_result(message, 'junking spam from blocked sender')
|
||||
message.move(self._folders['Junk Email'])
|
||||
return
|
||||
|
||||
# junk messages with blocked keywords in message body
|
||||
is_spam = False
|
||||
message_body = message.body.lower()
|
||||
for phrase in self._normalized['BLOCK_KEYWORDS']:
|
||||
if phrase in normalized_subject:
|
||||
is_spam = True
|
||||
break
|
||||
if phrase in message_body:
|
||||
is_spam = True
|
||||
break
|
||||
if is_spam:
|
||||
self._log_result(message, 'junking spam containing blocked keyword')
|
||||
message.move(self._folders['Junk Email'])
|
||||
return
|
||||
|
||||
# process message headers into a sane data structure
|
||||
headers = []
|
||||
for header in message.message_headers:
|
||||
h = {}
|
||||
h[header['name']] = header['value'].lower()
|
||||
headers.append(h)
|
||||
|
||||
# junk messages from blocked domains
|
||||
for domain in BLOCK_DOMAINS:
|
||||
if domain in message_from:
|
||||
is_spam = True
|
||||
break
|
||||
if search_headers(domain):
|
||||
is_spam = True
|
||||
break
|
||||
if is_spam:
|
||||
self._log_result(message, 'junking spam from blocked domain')
|
||||
return
|
||||
|
||||
# junk known spam headers
|
||||
if (get_header('X-Spam-Flag' == 'YES', headers)
|
||||
or int(get_header('X-MS-Exchange-Organization-SCL', headers)) >= 5):
|
||||
self._log_result(message, 'junking spam with known header')
|
||||
message.move(self._folders['Junk Email'])
|
||||
return
|
||||
|
||||
# KEEP MESSAGE
|
||||
self._log_result(message, 'keeping message, passed all filter checks')
|
||||
|
||||
def normalize_lists(self):
|
||||
self._normalized['BLOCK_KEYWORDS'] = [x.lower() for x in BLOCK_KEYWORDS]
|
||||
|
||||
def get_header(header_key, headers):
|
||||
vals = []
|
||||
for header in headers:
|
||||
if header_key in header:
|
||||
for val in header.values():
|
||||
vals.append(val)
|
||||
if len(vals) > 1:
|
||||
return vals
|
||||
elif len(vals) == 1:
|
||||
return vals[0]
|
||||
else:
|
||||
return False
|
||||
|
||||
def search_headers(search, headers):
|
||||
vals = []
|
||||
is_found = False
|
||||
for header in headers:
|
||||
for val in header.values():
|
||||
print(f"is {search} in {val}?")
|
||||
if search in val:
|
||||
return True
|
||||
4
logs/.gitignore
vendored
Normal file
4
logs/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# Ignore everything in this directory
|
||||
*
|
||||
# Except this file
|
||||
!.gitignore
|
||||
8
mail-filter.conf-dist
Normal file
8
mail-filter.conf-dist
Normal file
@@ -0,0 +1,8 @@
|
||||
[main]
|
||||
Filters =
|
||||
EnableDebugging = yes
|
||||
MailCheckInterval = 60
|
||||
|
||||
[logging]
|
||||
LogDir =
|
||||
Timezone = America/Los_Angeles
|
||||
154
mail-filter.py
Normal file
154
mail-filter.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import configparser
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
import pendulum
|
||||
import signal
|
||||
import time
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from O365 import Account, FileSystemTokenBackend
|
||||
|
||||
SCRIPTPATH = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# parse config file
|
||||
config = {}
|
||||
configfile = configparser.ConfigParser()
|
||||
configfile.read(SCRIPTPATH + '/mail-filter.conf')
|
||||
config['FILTERS_FILE'] = configfile.get('main', 'Filters')
|
||||
config['IS_DEBUG'] = configfile.getboolean('main', 'EnableDebugging')
|
||||
config['CHECK_INTERVAL'] = int(configfile.get('main', 'MailCheckInterval'))
|
||||
config['LOG_DIR'] = configfile.get('logging', 'LogDir')
|
||||
config['TIMEZONE'] = configfile.get('logging', 'Timezone')
|
||||
config['APP_CLIENT_ID'] = os.getenv('APP_CLIENT_ID')
|
||||
config['APP_SECRET_KEY'] = os.getenv('APP_SECRET_KEY')
|
||||
config['APP_TENANT_ID'] = os.getenv('APP_TENANT_ID')
|
||||
|
||||
# convert timestamp to local time
|
||||
def local_time(record, datefmt=None):
|
||||
return pendulum.from_timestamp(
|
||||
record.created,
|
||||
tz=pendulum.timezone(config['TIMEZONE'])
|
||||
).strftime('%Y-%m-%d %H:%M:%S %z')
|
||||
|
||||
# set up logger
|
||||
logger = logging.getLogger('o365mf')
|
||||
if config['IS_DEBUG']:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
logger.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s %(module)s [%(levelname)s] %(message)s')
|
||||
formatter.formatTime = local_time
|
||||
log_filename = f"{config['LOG_DIR']}/mail-filter.log"
|
||||
handler = logging.handlers.TimedRotatingFileHandler(
|
||||
log_filename, when='midnight', backupCount=5)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
class O365MailFilter(object):
|
||||
_scopes = [
|
||||
'basic',
|
||||
'https://graph.microsoft.com/Mail.ReadWrite'
|
||||
]
|
||||
|
||||
def __init__(self, config):
|
||||
self._config = config
|
||||
self._is_canceled = False
|
||||
self._folders = {}
|
||||
self._normalized = {}
|
||||
|
||||
# auth with O365
|
||||
self._authenticate()
|
||||
|
||||
def _authenticate(self):
|
||||
token_backend = FileSystemTokenBackend(token_path='.cache',
|
||||
token_filename='token.txt')
|
||||
|
||||
self._account = Account(
|
||||
(self._config['APP_CLIENT_ID'], self._config['APP_SECRET_KEY']),
|
||||
tenant_id=self._config['APP_TENANT_ID'],
|
||||
token_backend=token_backend
|
||||
)
|
||||
|
||||
if not self._account.is_authenticated:
|
||||
self._account.authenticate(scopes=self._scopes)
|
||||
|
||||
logger.info('Authentication successful')
|
||||
|
||||
def _load_filters(self):
|
||||
""" load filter code from a file on disk """
|
||||
loader = SourceFileLoader('filters', self._config['FILTERS_FILE'])
|
||||
module = loader.load_module()
|
||||
module.normalize_lists(self)
|
||||
# make 'filter_message()' implemented in the file available for use
|
||||
# within this class as 'self._filter_message()'
|
||||
self._filter_message = module.filter_message
|
||||
|
||||
def _load_folders(self):
|
||||
""" retrieve folders for this mailbox and cache their ids """
|
||||
self._folders = {}
|
||||
|
||||
mailbox = self._account.mailbox()
|
||||
folders = mailbox.get_folders()
|
||||
|
||||
for folder in folders:
|
||||
self._folders[folder.name] = folder.folder_id
|
||||
|
||||
def _repr_message(self, message):
|
||||
""" returns a str representation of a message suitable for logging """
|
||||
# to = ','.join([r.address for r in message.to])
|
||||
return f"[FROM: {message.sender.address} SUBJ: {message.subject}]"
|
||||
|
||||
def _log_result(self, message, result):
|
||||
logger.info(f"{self._repr_message(message)} RESULT: {result}")
|
||||
|
||||
def filter(self):
|
||||
self._load_filters()
|
||||
self._load_folders()
|
||||
|
||||
mailbox = self._account.mailbox()
|
||||
inbox = mailbox.inbox_folder()
|
||||
|
||||
# set limit to max allowed by O365, which is 999 messages
|
||||
# we have to explicitly set a limit value or the O365 library will not
|
||||
# paginate results correctly
|
||||
limit = self._account.protocol.max_top_value
|
||||
query = inbox.new_query()
|
||||
query = query.on_attribute('isRead').equals(False).select(
|
||||
'to_recipients', 'from', 'subject', 'body',
|
||||
'internet_message_headers'
|
||||
)
|
||||
messages = inbox.get_messages(query=query, limit=limit, batch=25)
|
||||
|
||||
for message in messages:
|
||||
self._filter_message(self, message)
|
||||
|
||||
def run(self):
|
||||
""" run filter as a loop """
|
||||
while not self._is_canceled:
|
||||
self.filter()
|
||||
time.sleep(self._config['CHECK_INTERVAL'])
|
||||
|
||||
logger.info('Done.')
|
||||
|
||||
def exit(self):
|
||||
self._is_canceled = True
|
||||
|
||||
|
||||
logger.info('Initializing O365 mail filter...')
|
||||
o365mf = O365MailFilter(config)
|
||||
|
||||
def exit(signum, frame):
|
||||
""" signal handler for a clean exit """
|
||||
logger.info(f"Caught signal {signum}, exiting...")
|
||||
o365mf.exit()
|
||||
|
||||
if __name__ == '__main__':
|
||||
# register signal handlers
|
||||
signal.signal(signal.SIGTERM, exit)
|
||||
signal.signal(signal.SIGHUP, exit)
|
||||
signal.signal(signal.SIGINT, exit)
|
||||
|
||||
# run it
|
||||
o365mf.run()
|
||||
22
requirements.txt
Normal file
22
requirements.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
beautifulsoup4==4.9.1
|
||||
boto3==1.14.18
|
||||
botocore==1.17.18
|
||||
certifi==2020.6.20
|
||||
chardet==3.0.4
|
||||
docutils==0.15.2
|
||||
idna==2.10
|
||||
jmespath==0.10.0
|
||||
O365==2.0.10
|
||||
oauthlib==3.1.0
|
||||
pendulum==2.1.0
|
||||
python-dateutil==2.8.1
|
||||
pytz==2020.1
|
||||
pytzdata==2019.3
|
||||
requests==2.24.0
|
||||
requests-oauthlib==1.3.0
|
||||
s3transfer==0.3.3
|
||||
six==1.15.0
|
||||
soupsieve==2.0.1
|
||||
stringcase==1.2.0
|
||||
tzlocal==2.1
|
||||
urllib3==1.25.9
|
||||
Reference in New Issue
Block a user