Emlx
snoop.data.analyzers.emlx
#
Tasks that handle converting modern Apple-format e-mail into RFC-822 format e-mail.
Functions#
reconstruct(file_pk, **depends_on)
#
Task to convert .emlx and .partial.emlx Apple email formats into RFC 822 .eml format.
The Apple .emlx format has two differences from the normal .eml:
- it prepends a single line with binary data to the mail email body
- it zeroes out larger parts inside the multipart message, and moves their payload to separate files on
disk in the same directory, with the extension
.partial.emlx.
This task reads all those .partial.emlx files and attaches them back to a new .eml email message to
be used with the rest of the pipeline.
Source code in snoop/data/analyzers/emlx.py
@snoop_task('emlx.reconstruct', version=1)
def reconstruct(file_pk, **depends_on):
"""Task to convert `.emlx` and `.partial.emlx` Apple email formats into RFC 822 `.eml` format.
The Apple `.emlx` format has two differences from the normal `.eml`:
- it prepends a single line with binary data to the mail email body
- it zeroes out larger parts inside the multipart message, and moves their payload to separate files on
disk in the same directory, with the extension `.partial.emlx`.
This task reads all those `.partial.emlx` files and attaches them back to a new `.eml` email message to
be used with the rest of the pipeline.
"""
from .. import filesystem # noqa: F401
file = models.File.objects.get(pk=file_pk)
with file.original.open() as f:
original_data = f.read()
eml_data = re.sub(rb'^\d+\s+', b'', original_data, re.MULTILINE)
message = email.message_from_bytes(eml_data)
for ref, part in iter_parts(message):
if part.get('X-Apple-Content-Length'):
ext = f'.{ref}.emlxpart'
part_name = re.sub(r'\.partial\.emlx$', ext, file.name)
parent = file.parent_directory
part_file = (
parent.child_file_set
.filter(name_bytes=part_name.encode('utf8', errors='surrogateescape'))
.first()
)
if not part_file:
log.warning("Missing %r", part_name)
continue
with part_file.original.open() as f:
payload = f.read()
part.set_payload(payload)
with models.Blob.create() as output:
output.write(message.as_bytes())
return output.blob