Professional Documents
Culture Documents
config
import imaplib
import io
import json
import base64
import hashlib
import re
import email
import emoji
import urllib.parse
import traceback
import ioc_finder
import thehive4py.api, thehive4py.models, thehive4py.query
def connect_to_IMAP_server(wsl):
# Create the connection to the IMAP server using host and port
connection = imaplib.IMAP4_SSL(config['imapHost'], config['imapPort'])
# Log in using username and password
connection.login(config['imapUser'],config['imapPassword'])
log.info('Connected to email {0} server
{1}:{2}/{3}'.format(config['imapUser'], config['imapHost'], config['imapPort'],
config['imapFolder']))
wsl.emit_info('Connected to email {0} server
{1}:{2}/{3}'.format(config['imapUser'], config['imapHost'], config['imapPort'],
config['imapFolder']))
return connection
# Use the ioc-finder module to extract observables from a string buffer and add to
the list only if they are not whitelisted
def search_observables(buffer, wsl):
observables = []
iocs = {}
iocs['email_addresses'] = ioc_finder.parse_email_addresses(buffer)
iocs['ipv4s'] = ioc_finder.parse_ipv4_addresses(buffer)
iocs['domains'] = ioc_finder.parse_domain_names(buffer)
# Option to parse URLs without a scheme (e.g. without https://)
iocs['urls'] = ioc_finder.parse_urls(buffer, parse_urls_without_scheme=False)
for mail in iocs['email_addresses']:
if is_whitelisted('mail', mail):
log.info("Skipped whitelisted observable mail: {0}".format(mail))
wsl.emit_info("Skipped whitelisted observable mail:
{0}".format(mail))
else:
log.info("Found observable mail: {0}".format(mail))
wsl.emit_info("Found observable mail: {0}".format(mail))
observables.append({'type': 'mail', 'value': mail})
for ip in iocs['ipv4s']:
if is_whitelisted('ip', ip):
log.info("Skipped whitelisted observable ip: {0}".format(ip))
wsl.emit_info("Skipped whitelisted observable ip:
{0}".format(ip))
else:
log.info("Found observable ip: {0}".format(ip))
wsl.emit_info("Found observable ip: {0}".format(ip))
observables.append({'type': 'ip', 'value': ip})
for domain in iocs['domains']:
if is_whitelisted('domain', domain):
log.info("Skipped whitelisted observable domain:
{0}".format(domain))
wsl.emit_info("Skipped whitelisted observable domain:
{0}".format(domain))
else:
log.info("Found observable domain: {0}".format(domain))
wsl.emit_info("Found observable domain: {0}".format(domain))
observables.append({'type': 'domain', 'value': domain})
for url in iocs['urls']:
if is_whitelisted('url', url):
log.info("Skipped whitelisted observable url: {0}".format(url))
wsl.emit_info("Skipped whitelisted observable url:
{0}".format(url))
else:
log.info("Found observable url: {0}".format(url))
wsl.emit_info("Found observable url: {0}".format(url))
observables.append({'type': 'url', 'value': url})
return observables
# Use the mail UID of the selected email to fetch only that email from the mailbox
def obtain_eml(connection, mail_uid, wsl):
# Read all the unseen emails from this folder
connection.select(config['imapFolder'])
typ, dat = connection.search(None, '(UNSEEN)')
# The dat[0] variable contains the IDs of all the unread emails
# The IDs are obtained by using the split function and the length of the
array is the number of unread emails
# If the selected mail uid is present in the list, then process only that
email
if mail_uid.encode() in dat[0].split():
typ, dat = connection.fetch(mail_uid.encode(), '(RFC822)')
if typ != 'OK':
log.error(dat[-1])
wsl.emit_error(dat[-1])
message = dat[0][1]
# The fetch operation flags the message as seen by default
log.info("Message {0} flagged as read".format(mail_uid))
wsl.emit_info("Message {0} flagged as read".format(mail_uid))
# Obtain the From field of the external email that will be used to send
the verdict to the user
msg = email.message_from_bytes(message)
decode = email.header.decode_header(msg['From'])[0]
if decode[1] is not None:
external_from_field = decode[0].decode(decode[1])
else:
external_from_field = str(decode[0])
parsed_from_field = email.utils.parseaddr(external_from_field)
if len(parsed_from_field) > 1:
external_from_field = parsed_from_field[1]
# Walk the multipart structure of the email (now only the EML part is
needed)
for part in msg.walk():
mimetype = part.get_content_type()
# If the content type of this part is the rfc822 message, then
stop because the EML attachment is the last part
# If there is any other part after the rfc822 part, then it may
be related to the internal email, so it must not be considered
# Both message/rfc822 and application/octet-stream types are
considered due to differences in how the attachment is handled by different mail
clients
if mimetype in ['application/octet-stream', 'message/rfc822']:
# Obtain the internal EML file in both cases
if mimetype == 'application/octet-stream':
eml_payload = part.get_payload(decode=1)
internal_msg = email.message_from_bytes(eml_payload)
elif mimetype == 'message/rfc822':
eml_payload = part.get_payload(decode=0)[0]
try:
internal_msg =
email.message_from_string(base64.b64decode(str(eml_payload)).decode())
except:
internal_msg = eml_payload
# If the EML attachment has been found, then break the for
break
else:
# Handle multiple analysts that select the same email from more than
one tab
log.error("The email with UID {} has already been analyzed. Please
refresh the page and retry.".format(mail_uid))
wsl.emit_error("The email with UID {} has already been analyzed. Please
refresh the page and retry.".format(mail_uid))
return
decoded_elements_subj.append(decode_elem[0].decode(decode_elem[1]))
else:
if(isinstance(decode_elem[0], str)):
decoded_elements_subj.append(str(decode_elem[0]))
else:
decoded_elements_subj.append(decode_elem[0].decode())
subject_field = ''.join(decoded_elements_subj)
# Search the observables in the values of all the selected header fields
# Since a field may appear more than one time (e.g. Received:), the lists
need to be initialized and then extended
i = 0
while i < len(header_fields.keys()):
if header_fields.keys()[i] in header_fields_list:
if not observables_header.get(header_fields.keys()[i]):
observables_header[header_fields.keys()[i]] = []
observables_header[header_fields.keys()
[i]].extend(search_observables(header_fields.values()[i], wsl))
i+=1
# Create a tuple containing the eml file and the name it should have as an
observable
filename = subject_field + ".eml"
inmem_file = io.BytesIO()
gen = email.generator.BytesGenerator(inmem_file)
gen.flatten(internal_msg)
eml_file_tuple = (inmem_file, filename)
# Workaround to prevent HTML tags to appear inside the URLs (splits on < or
>)
for observable_body in observables_body:
if observable_body['type'] == "url":
observable_body['value'] = observable_body['value'].replace(">",
"<").split("<")[0]
# Add attachments
for attachment in attachments:
observable = thehive4py.models.CaseObservable(
dataType='file',
data = attachment,
ioc = False,
tags = ['email', 'email_attachment'],
message = 'Found as email attachment'
)
response = api_thehive.create_case_observable(new_id, observable)
if response.status_code == 201:
log.info('Added observable file {0} to case
{1}'.format(attachment[1], new_case_id))
wsl.emit_info('Added observable file {0} to case
{1}'.format(attachment[1], new_case_id))
else:
log.debug('Cannot add observable: file {0} - {1}
({2})'.format(attachment[1], response.status_code, response.text))
else:
log.error('Cannot create case: {0} ({1})'.format(response.status_code,
response.text))
wsl.emit_error('Cannot create case: {0}
({1})'.format(response.status_code, response.text))
return
# Return the id of the just created case on which to run the analysis
return new_case
global config
global whitelist
global log
global api_thehive
# Logging configuration
try:
with open('logging_conf.json') as log_conf:
log_conf_dict = json.load(log_conf)
logging.config.dictConfig(log_conf_dict)
except Exception as e:
print("[ERROR]_[list_emails]: Error while trying to open the file
'logging_conf.json'. It cannot be read or it is not valid:
{}".format(traceback.format_exc()))
return
log = logging.getLogger(__name__)
try:
with open('configuration.json') as conf_file:
conf_dict = json.load(conf_file)
# IMAP configuration
config['imapHost'] = conf_dict['imap']['host']
config['imapPort'] = int(conf_dict['imap']['port'])
config['imapUser'] = conf_dict['imap']['user']
config['imapPassword'] = conf_dict['imap']['password']
config['imapFolder'] = conf_dict['imap']['folder']
# TheHive configuration
config['thehiveURL'] = conf_dict['thehive']['url']
config['thehiveApiKey'] = conf_dict['thehive']['apikey']
except Exception as e:
log.error("Error while trying to open the file 'configuration.json':
{}".format(traceback.format_exc()))
wsl.emit_error("Error while trying to open the file
'configuration.json'")
return
# The domains in the last three lists are used to create three
lists of regular expressions that serve to whitelist subdomains, URLs and email
addresses based on those domains
whitelist['regexDomainsInSubdomains'] = [r'^(.+\.|)
{0}$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInSubdomains']]
whitelist['regexDomainsInURLs'] = [r'^(http|https):\/\/([^\/]
+\.|){0}(\/.*|\?.*|\#.*|)$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInURLs']]
whitelist['regexDomainsInEmails'] = [r'^.+@(.+\.|)
{0}$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInEmails']]
except Exception as e:
log.error("Error while trying to open the file 'whitelist.json':
{}".format(traceback.format_exc()))
wsl.emit_error("Error while trying to open the file 'whitelist.json'")
return