Case From Email

import logging.
config
import imaplib
import io
import json
import base64
import hashlib
import re
import email
import emoji
import urllib.parse
import traceback
import ioc_finder
import thehive4py.api, thehive4py.models, thehive4py.query
# Global variable used for logging

log = None
# Global variable needed to use the API

api_thehive = None
# Global variable used for the configuration

config = {}
# Global variable used for the whitelist

whitelist = {}
def connect_to_IMAP_server(wsl):
# Create the connection to the IMAP server using host and port
connection = imaplib.IMAP4_SSL(config['imapHost'], config['imapPort'])
# Log in using username and password
connection.login(config['imapUser'],config['imapPassword'])
log.info('Connected to email {0} server
{1}:{2}/{3}'.format(config['imapUser'], config['imapHost'], config['imapPort'],
config['imapFolder']))
wsl.emit_info('Connected to email {0} server
{1}:{2}/{3}'.format(config['imapUser'], config['imapHost'], config['imapPort'],
config['imapFolder']))
return connection
# Check if an observable is whitelisted with an exact match or with a regex match

def is_whitelisted(obs_type, obs_value):
found = False
if ((not found) and (obs_value in whitelist[obs_type+'Exact'])):
found = True
if ((not found) and (obs_type == 'domain')):
for regex in whitelist['regexDomainsInSubdomains']:
if re.search(regex, obs_value):
found = True
if ((not found) and (obs_type == 'url')):
for regex in whitelist['regexDomainsInURLs']:
found = True
if ((not found) and (obs_type == 'mail')):
for regex in whitelist['regexDomainsInEmails']:
found = True
if ((not found) and (obs_type not in ['hash', 'filetype'])):
for regex in whitelist[obs_type+'Regex']:
found = True
return found
# Use the ioc-finder module to extract observables from a string buffer and add to
the list only if they are not whitelisted
def search_observables(buffer, wsl):
observables = []
iocs = {}
iocs['email_addresses'] = ioc_finder.parse_email_addresses(buffer)
iocs['ipv4s'] = ioc_finder.parse_ipv4_addresses(buffer)
iocs['domains'] = ioc_finder.parse_domain_names(buffer)
# Option to parse URLs without a scheme (e.g. without https://)
iocs['urls'] = ioc_finder.parse_urls(buffer, parse_urls_without_scheme=False)
for mail in iocs['email_addresses']:
if is_whitelisted('mail', mail):
log.info("Skipped whitelisted observable mail: {0}".format(mail))
wsl.emit_info("Skipped whitelisted observable mail:
{0}".format(mail))
else:
log.info("Found observable mail: {0}".format(mail))
wsl.emit_info("Found observable mail: {0}".format(mail))
observables.append({'type': 'mail', 'value': mail})
for ip in iocs['ipv4s']:
if is_whitelisted('ip', ip):
log.info("Skipped whitelisted observable ip: {0}".format(ip))
wsl.emit_info("Skipped whitelisted observable ip:
{0}".format(ip))
else:
log.info("Found observable ip: {0}".format(ip))
wsl.emit_info("Found observable ip: {0}".format(ip))
observables.append({'type': 'ip', 'value': ip})
for domain in iocs['domains']:
if is_whitelisted('domain', domain):
log.info("Skipped whitelisted observable domain:
{0}".format(domain))
wsl.emit_info("Skipped whitelisted observable domain:
{0}".format(domain))
else:
log.info("Found observable domain: {0}".format(domain))
wsl.emit_info("Found observable domain: {0}".format(domain))
observables.append({'type': 'domain', 'value': domain})
for url in iocs['urls']:
if is_whitelisted('url', url):
log.info("Skipped whitelisted observable url: {0}".format(url))
wsl.emit_info("Skipped whitelisted observable url:
{0}".format(url))
else:
log.info("Found observable url: {0}".format(url))
wsl.emit_info("Found observable url: {0}".format(url))
observables.append({'type': 'url', 'value': url})
return observables
# Use the mail UID of the selected email to fetch only that email from the mailbox
def obtain_eml(connection, mail_uid, wsl):
# Read all the unseen emails from this folder
connection.select(config['imapFolder'])
typ, dat = connection.search(None, '(UNSEEN)')
# The dat[0] variable contains the IDs of all the unread emails
# The IDs are obtained by using the split function and the length of the
array is the number of unread emails
# If the selected mail uid is present in the list, then process only that
email
if mail_uid.encode() in dat[0].split():
typ, dat = connection.fetch(mail_uid.encode(), '(RFC822)')
if typ != 'OK':
log.error(dat[-1])
wsl.emit_error(dat[-1])
message = dat[0][1]
# The fetch operation flags the message as seen by default
log.info("Message {0} flagged as read".format(mail_uid))
wsl.emit_info("Message {0} flagged as read".format(mail_uid))
# Obtain the From field of the external email that will be used to send
the verdict to the user
msg = email.message_from_bytes(message)
decode = email.header.decode_header(msg['From'])[0]
if decode[1] is not None:
external_from_field = decode[0].decode(decode[1])
else:
external_from_field = str(decode[0])
parsed_from_field = email.utils.parseaddr(external_from_field)
if len(parsed_from_field) > 1:
external_from_field = parsed_from_field[1]
# Variable used to detect the mimetype of the email parts

mimetype = None
# Variable that will contain the internal EML file

internal_msg = None
# Walk the multipart structure of the email (now only the EML part is
needed)
for part in msg.walk():
mimetype = part.get_content_type()
# If the content type of this part is the rfc822 message, then
stop because the EML attachment is the last part
# If there is any other part after the rfc822 part, then it may
be related to the internal email, so it must not be considered
# Both message/rfc822 and application/octet-stream types are
considered due to differences in how the attachment is handled by different mail
clients
if mimetype in ['application/octet-stream', 'message/rfc822']:
# Obtain the internal EML file in both cases
if mimetype == 'application/octet-stream':
eml_payload = part.get_payload(decode=1)
internal_msg = email.message_from_bytes(eml_payload)
elif mimetype == 'message/rfc822':
eml_payload = part.get_payload(decode=0)[0]
try:
internal_msg =
email.message_from_string(base64.b64decode(str(eml_payload)).decode())
except:
internal_msg = eml_payload
# If the EML attachment has been found, then break the for
break
return internal_msg, external_from_field
else:
# Handle multiple analysts that select the same email from more than
one tab
log.error("The email with UID {} has already been analyzed. Please
refresh the page and retry.".format(mail_uid))
wsl.emit_error("The email with UID {} has already been analyzed. Please
refresh the page and retry.".format(mail_uid))
return
# Parse the EML file and extract the observables

def parse_eml(internal_msg, wsl):
# Obtain the subject of the internal email

# This is not straightforward since the subject might be splitted in two or
more parts
decode_subj = email.header.decode_header(internal_msg['Subject'])
decoded_elements_subj = []
for decode_elem in decode_subj:
if decode_elem[1] is not None:
if str(decode_elem[1]) == 'unknown-8bit':
decoded_elements_subj.append(decode_elem[0].decode())
else:
decoded_elements_subj.append(decode_elem[0].decode(decode_elem[1]))
else:
if(isinstance(decode_elem[0], str)):
decoded_elements_subj.append(str(decode_elem[0]))
else:
decoded_elements_subj.append(decode_elem[0].decode())
subject_field = ''.join(decoded_elements_subj)
log.info("Analyzing attached message with subject: {}".format(subject_field))

wsl.emit_info("Analyzing attached message with subject:
{}".format(subject_field))
# List of attachments of the internal email

attachments = []
# List of attachment hashes

hashes_attachments = []
# List of observables found in the body of the internal email

observables_body = []
# Dictionary containing a list of observables found in each header field

observables_header = {}
# List of header fields to consider when searching for observables in the

header
header_fields_list = [
'To',
'From',
'Sender',
'Cc',
'Delivered-To',
'Return-Path',
'Reply-To',
'Bounces-to',
'Received',
'X-Received',
'X-OriginatorOrg',
'X-Sender-IP',
'X-Originating-IP',
'X-SenderIP',
'X-Originating-Email'
]
# Extract header fields

parser = email.parser.HeaderParser()
header_fields = parser.parsestr(internal_msg.as_string())
# Search the observables in the values of all the selected header fields
# Since a field may appear more than one time (e.g. Received:), the lists
need to be initialized and then extended
i = 0
while i < len(header_fields.keys()):
if header_fields.keys()[i] in header_fields_list:
if not observables_header.get(header_fields.keys()[i]):
observables_header[header_fields.keys()[i]] = []
observables_header[header_fields.keys()
[i]].extend(search_observables(header_fields.values()[i], wsl))
i+=1
# Walk the multipart structure of the internal email

for part in internal_msg.walk():
mimetype = part.get_content_type()
content_disposition = part.get_content_disposition()
if content_disposition != "attachment":
# Extract the observables from the body (from both text/plain and
text/html parts) using the search_observables function
if mimetype == "text/plain":
try:
body = part.get_payload(decode=True).decode()
except UnicodeDecodeError:
body = part.get_payload(decode=True).decode('ISO-
8859-1')
observables_body.extend(search_observables(body, wsl))
elif mimetype == "text/html":
try:
html = part.get_payload(decode=True).decode()
except UnicodeDecodeError:
html = part.get_payload(decode=True).decode('ISO-
8859-1')
# Handle URL encoding
html_urldecoded =
urllib.parse.unquote(html.replace("&", "&"))
observables_body.extend(search_observables(html_urldecoded,
wsl))
# Extract attachments
else:
filename = part.get_filename()
if filename and mimetype:
# Add the attachment if it is not whitelisted (in terms of
filename or filetype)
if is_whitelisted('filename', filename) or
is_whitelisted('filetype', mimetype):
log.info("Skipped whitelisted observable file:
{0}".format(filename))
wsl.emit_info("Skipped whitelisted observable file:
else:
inmem_file = io.BytesIO(part.get_payload(decode=1))
attachments.append((inmem_file, filename))
log.info("Found observable file:
wsl.emit_info("Found observable file:
# Calculate the hash of the just found attachment
sha256 = hashlib.sha256()
sha256.update(part.get_payload(decode=1))
hash_attachment = {}
hash_attachment['hashValue'] = sha256.hexdigest()
hash_attachment['hashedAttachment'] = filename
if is_whitelisted('hash',
hash_attachment['hashValue']):
log.info("Skipped whitelisted observable hash:
{0}".format(hash_attachment['hashValue']))
wsl.emit_info("Skipped whitelisted observable
hash: {0}".format(hash_attachment['hashValue']))
else:
hashes_attachments.append(hash_attachment)
log.info("Found observable hash {0} calculated
from file: {1}".format(hash_attachment['hashValue'], filename))
wsl.emit_info("Found observable hash {0}
calculated from file: {1}".format(hash_attachment['hashValue'], filename))
# Create a tuple containing the eml file and the name it should have as an
observable
filename = subject_field + ".eml"
inmem_file = io.BytesIO()
gen = email.generator.BytesGenerator(inmem_file)
gen.flatten(internal_msg)
eml_file_tuple = (inmem_file, filename)
# Workaround to prevent HTML tags to appear inside the URLs (splits on < or
>)
for observable_body in observables_body:
if observable_body['type'] == "url":
observable_body['value'] = observable_body['value'].replace(">",
"<").split("<")[0]
return subject_field, observables_header, observables_body, attachments,

hashes_attachments, eml_file_tuple
# Create the case on TheHive and add the observables to it

def create_case(subject_field, observables_header, observables_body, attachments,
hashes_attachments, eml_file_tuple, wsl):
# Create the case template first if it does not exist
if(len(api_thehive.find_case_templates(query = thehive4py.query.Eq("name",
'ThePhish')).json())) == 0:
task_notification = thehive4py.models.CaseTask(title = 'ThePhish
notification')
task_analysis = thehive4py.models.CaseTask(title = 'ThePhish analysis')
task_result = thehive4py.models.CaseTask(title = 'ThePhish result')
case_template = thehive4py.models.CaseTemplate(name =
'ThePhish',
titlePrefix = '[ThePhish] ',
tasks = [task_notification, task_analysis, task_result])

response = api_thehive.create_case_template(case_template)
if response.status_code == 201:
log.info('Template ThePhish created successfully')
wsl.emit_info('Template ThePhish created successfully')
else:
log.error('Cannot create template: {0}
({1})'.format(response.status_code, response.text))
wsl.emit_error('Cannot create template: {0}
return
# Create the case on TheHive

# The emojis are removed to prevent problems when exporting the case to MISP
case = thehive4py.models.Case(title =
emoji.replace_emoji(subject_field),
tlp = int(config['caseTLP']),
pap = int(config['casePAP']),
flag = False,
tags = config['caseTags'],
description = 'Case created automatically by ThePhish',
template = 'ThePhish')
response = api_thehive.create_case(case)
new_case = response
new_id = new_case.json()['id']
new_case_id = new_case.json()['caseId']
log.info('Created case {}'.format(new_case_id))
wsl.emit_info('Created case {}'.format(new_case_id))
# Add observables found in the mail header

for header_field in observables_header:
for observable_header in observables_header[header_field]:
observable = thehive4py.models.CaseObservable(
dataType = observable_header['type'],
data = observable_header['value'],
ioc = False,
tags = ['email', 'email_header',
'email_header_{}'.format(header_field)],
message = 'Found in the {} field of the email
header'.format(header_field)
)
response = api_thehive.create_case_observable(new_id,
observable)
log.info('Added observable {0}: {1} to case
{2}'.format(observable_header['type'], observable_header['value'], new_case_id))
wsl.emit_info('Added observable {0}: {1} to case
{2}'.format(observable_header['type'], observable_header['value'], new_case_id))
else:
log.debug('Cannot add observable {0}: {1} - {2}
({3})'.format(observable_header['type'], observable_header['value'],
response.status_code, response.text))
# Add observables found in the mail body

for observable_body in observables_body:
dataType = observable_body['type'],
data = observable_body['value'],
ioc = False,
tags = ['email', 'email_body'],
message = 'Found in the email body'
)
response = api_thehive.create_case_observable(new_id, observable)
log.info('Added observable {0}: {1} to case
{2}'.format(observable_body['type'], observable_body['value'], new_case_id))
wsl.emit_info('Added observable {0}: {1} to case
{2}'.format(observable_body['type'], observable_body['value'], new_case_id))
else:
log.debug('Cannot add observable {0}: {1} - {2}
({3})'.format(observable_body['type'], observable_body['value'],
response.status_code, response.text))
# Add attachments
for attachment in attachments:
dataType='file',
data = attachment,
ioc = False,
tags = ['email', 'email_attachment'],
message = 'Found as email attachment'
)
log.info('Added observable file {0} to case
{1}'.format(attachment[1], new_case_id))
wsl.emit_info('Added observable file {0} to case
{1}'.format(attachment[1], new_case_id))
else:
log.debug('Cannot add observable: file {0} - {1}
({2})'.format(attachment[1], response.status_code, response.text))
# Add hashes of the attachments

for hash_attachment in hashes_attachments:
dataType = 'hash',
data = hash_attachment['hashValue'],
ioc = False,
tags = ['email', 'email_attachment_hash'],
message = 'Hash of attachment
"{}"'.format(hash_attachment['hashedAttachment'])
)
log.info('Added observable hash: {0} to case
{1}'.format(hash_attachment['hashValue'], new_case_id))
wsl.emit_info('Added observable hash: {0} to case
{1}'.format(hash_attachment['hashValue'], new_case_id))
else:
log.debug('Cannot add observable hash: {0} - {1}
({2})'.format(hash_attachment['hashValue'], response.status_code, response.text))
# Add eml file (using the tuple)

if eml_file_tuple:
dataType='file',
data = eml_file_tuple,
ioc = False,
tags = ['email', 'email_sample'],
message = 'Attached email in eml format'
)
log.info('Added observable file {0} to case
{1}'.format(eml_file_tuple[1], new_case_id))
wsl.emit_info('Added observable file {0} to case
{1}'.format(eml_file_tuple[1], new_case_id))
else:
log.debug('Cannot add observable: file {0} - {1}
({2})'.format(eml_file_tuple[1], response.status_code, response.text))
else:
log.error('Cannot create case: {0} ({1})'.format(response.status_code,
response.text))
wsl.emit_error('Cannot create case: {0}
return
# Return the id of the just created case on which to run the analysis
return new_case
# Main function called from outside

# The wsl is not a global variable to support multiple tabs
def main(wsl, mail_uid):
global config
global whitelist
global log
global api_thehive
# Logging configuration
try:
with open('logging_conf.json') as log_conf:
log_conf_dict = json.load(log_conf)
logging.config.dictConfig(log_conf_dict)
except Exception as e:
print("[ERROR]_[list_emails]: Error while trying to open the file
'logging_conf.json'. It cannot be read or it is not valid:
{}".format(traceback.format_exc()))
return
log = logging.getLogger(__name__)
try:
with open('configuration.json') as conf_file:
conf_dict = json.load(conf_file)
# IMAP configuration
config['imapHost'] = conf_dict['imap']['host']
config['imapPort'] = int(conf_dict['imap']['port'])
config['imapUser'] = conf_dict['imap']['user']
config['imapPassword'] = conf_dict['imap']['password']
config['imapFolder'] = conf_dict['imap']['folder']
# TheHive configuration
config['thehiveURL'] = conf_dict['thehive']['url']
config['thehiveApiKey'] = conf_dict['thehive']['apikey']
# New case configuration

config['caseTLP'] = conf_dict['case']['tlp']
config['casePAP'] = conf_dict['case']['pap']
config['caseTags'] = conf_dict['case']['tags']
log.error("Error while trying to open the file 'configuration.json':
wsl.emit_error("Error while trying to open the file
'configuration.json'")
return
# Read the whitelist file, which is composed by various parts:

# - The exact matching part
# - The regex matching part
# - Three lists of domains that are used to whitelist subdomains, URLs and
email addresses that contain them
try:
with open('whitelist.json') as whitelist_file:
whitelist_dict = json.load(whitelist_file)
whitelist['mailExact'] = whitelist_dict['exactMatching']['mail']
whitelist['mailRegex'] = whitelist_dict['regexMatching']['mail']
whitelist['ipExact'] = whitelist_dict['exactMatching']['ip']
whitelist['ipRegex'] = whitelist_dict['regexMatching']['ip']
whitelist['domainExact'] = whitelist_dict['exactMatching']
['domain']
whitelist['domainRegex'] = whitelist_dict['regexMatching']
['domain']
whitelist['urlExact'] = whitelist_dict['exactMatching']['url']
whitelist['urlRegex'] = whitelist_dict['regexMatching']['url']
whitelist['filenameExact'] = whitelist_dict['exactMatching']
['filename']
whitelist['filenameRegex'] = whitelist_dict['regexMatching']
['filename']
whitelist['filetypeExact'] = whitelist_dict['exactMatching']
['filetype']
whitelist['hashExact'] = whitelist_dict['exactMatching']['hash']
# The domains in the last three lists are used to create three
lists of regular expressions that serve to whitelist subdomains, URLs and email
addresses based on those domains
whitelist['regexDomainsInSubdomains'] = [r'^(.+\.|)
{0}$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInSubdomains']]
whitelist['regexDomainsInURLs'] = [r'^(http|https):\/\/([^\/]
+\.|){0}(\/.*|\?.*|\#.*|)$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInURLs']]
whitelist['regexDomainsInEmails'] = [r'^.+@(.+\.|)
{0}$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInEmails']]
log.error("Error while trying to open the file 'whitelist.json':
wsl.emit_error("Error while trying to open the file 'whitelist.json'")
return
# Object needed to use TheHive4py

api_thehive = thehive4py.api.TheHiveApi(config['thehiveURL'],
config['thehiveApiKey'])
# Connect to IMAP server

try:
connection = connect_to_IMAP_server(wsl)
log.error("Error while trying to connect to IMAP server:
wsl.emit_error("Error while trying to connect to IMAP server")
return
# Call the obtain_eml function

try:
internal_msg, external_from_field = obtain_eml(connection, mail_uid,
wsl)
log.error("Error while trying to obtain the internal eml file:
wsl.emit_error("Error while trying to obtain the internal eml file")
return
# Call the parse_eml function

try:
subject_field, observables_header, observables_body, attachments,
hashes_attachments, eml_file_tuple = parse_eml(internal_msg, wsl)
log.error("Error while trying to parse the internal eml file:
wsl.emit_error("Error while trying to parse the internal eml file")
return
# Call the create_case function

try:
new_case = create_case(subject_field, observables_header,
observables_body, attachments, hashes_attachments, eml_file_tuple, wsl)
log.error("Error while trying to create the case:
wsl.emit_error("Error while trying to create the case")
return
return new_case, external_from_field

Case From Email

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Case From Email

Uploaded by

Copyright:

Available Formats

import logging.

# Global variable used for logging

# Global variable needed to use the API

# Global variable used for the configuration

# Global variable used for the whitelist

# Check if an observable is whitelisted with an exact match or with a regex match

# Variable used to detect the mimetype of the email parts

# Variable that will contain the internal EML file

return internal_msg, external_from_field

# Parse the EML file and extract the observables

# Obtain the subject of the internal email

log.info("Analyzing attached message with subject: {}".format(subject_field))

# List of attachments of the internal email

# List of attachment hashes

# List of observables found in the body of the internal email

# Dictionary containing a list of observables found in each header field

# List of header fields to consider when searching for observables in the

# Extract header fields

# Walk the multipart structure of the internal email

return subject_field, observables_header, observables_body, attachments,

# Create the case on TheHive and add the observables to it

titlePrefix = '[ThePhish] ',

tasks = [task_notification, task_analysis, task_result])

# Create the case on TheHive

# Add observables found in the mail header

# Add observables found in the mail body

# Add hashes of the attachments

# Add eml file (using the tuple)

# Main function called from outside

# New case configuration

# Read the whitelist file, which is composed by various parts:

# Object needed to use TheHive4py

# Connect to IMAP server

# Call the obtain_eml function

# Call the parse_eml function

# Call the create_case function

return new_case, external_from_field

You might also like