From 1b7ba5d344ac59ee4061da8053ebba6b8ae6ddc3 Mon Sep 17 00:00:00 2001 From: Sergey Lavrinenko Date: Tue, 31 Mar 2026 21:10:53 +0300 Subject: [PATCH 1/4] Fall back to content-based MIME detection via puremagic When file extension is missing or unrecognized, detect MIME type from file content using magic bytes. Closes #163 --- emails/store/file.py | 8 ++++++++ emails/testsuite/store/test_store.py | 22 ++++++++++++++++++++++ requirements/base.txt | 1 + 3 files changed, 31 insertions(+) diff --git a/emails/store/file.py b/emails/store/file.py index d601f16..771b91a 100644 --- a/emails/store/file.py +++ b/emails/store/file.py @@ -2,6 +2,7 @@ import uuid from mimetypes import guess_type +import puremagic from email.mime.base import MIMEBase from email.encoders import encode_base64 from os.path import basename @@ -102,6 +103,13 @@ def get_mime_type(self) -> str: filename = self.filename if filename: r = self._mime_type = guess_type(filename)[0] + if not r: + data = self.data + if data: + try: + r = puremagic.from_string(data if isinstance(data, bytes) else data.encode(), mime=True) + except puremagic.PureError: + pass if not r: r = MIMETYPE_UNKNOWN self._mime_type = r diff --git a/emails/testsuite/store/test_store.py b/emails/testsuite/store/test_store.py index c5596f2..5b466d9 100644 --- a/emails/testsuite/store/test_store.py +++ b/emails/testsuite/store/test_store.py @@ -68,6 +68,28 @@ def test_get_data_none(): assert f.data is None +def test_mime_type_from_content(): + # PNG magic bytes, no file extension + png_header = (b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR' + b'\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02' + b'\x00\x00\x00\x90wS\xde') + f = BaseFile(data=png_header, filename='image_no_ext') + assert f.mime_type == 'image/png' + + # JPEG magic bytes, no file extension + jpeg_header = b'\xff\xd8\xff\xe0\x00\x10JFIF' + f = BaseFile(data=jpeg_header, filename='photo') + assert f.mime_type == 'image/jpeg' + + # Unknown bytes, no extension — should fall back to unknown + f = BaseFile(data=b'\x00\x01\x02\x03', filename='mystery') + assert f.mime_type == 'application/unknown' + + # Extension still takes priority + f = BaseFile(data=png_header, filename='image.gif') + assert f.mime_type == 'image/gif' + + def test_store_commons2(): store = emails.store.MemoryFileStore() f1 = store.add({'uri': '/a/c.gif'}) diff --git a/requirements/base.txt b/requirements/base.txt index efd3de4..466b8f5 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,3 +4,4 @@ chardet python-dateutil requests premailer>=2.8.3 +puremagic From 2392dac9b5d5215d2702c20011de17b936800977 Mon Sep 17 00:00:00 2001 From: Sergey Lavrinenko Date: Tue, 31 Mar 2026 21:18:49 +0300 Subject: [PATCH 2/4] Fix stream exhaustion in mime detection, add puremagic to install_requires --- emails/store/file.py | 11 ++++++++--- emails/testsuite/store/test_store.py | 5 +++++ setup.py | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/emails/store/file.py b/emails/store/file.py index 771b91a..b760b5e 100644 --- a/emails/store/file.py +++ b/emails/store/file.py @@ -104,10 +104,15 @@ def get_mime_type(self) -> str: if filename: r = self._mime_type = guess_type(filename)[0] if not r: - data = self.data - if data: + _data = self._data + if isinstance(_data, bytes): + try: + r = puremagic.from_string(_data, mime=True) + except puremagic.PureError: + pass + elif isinstance(_data, str): try: - r = puremagic.from_string(data if isinstance(data, bytes) else data.encode(), mime=True) + r = puremagic.from_string(_data.encode(), mime=True) except puremagic.PureError: pass if not r: diff --git a/emails/testsuite/store/test_store.py b/emails/testsuite/store/test_store.py index 5b466d9..33adca5 100644 --- a/emails/testsuite/store/test_store.py +++ b/emails/testsuite/store/test_store.py @@ -89,6 +89,11 @@ def test_mime_type_from_content(): f = BaseFile(data=png_header, filename='image.gif') assert f.mime_type == 'image/gif' + # File-like data: mime detection skips streams, data not exhausted + f = BaseFile(data=BytesIO(png_header), filename='no_ext') + assert f.mime_type == 'application/unknown' + assert f.data == png_header # stream not consumed by mime detection + def test_store_commons2(): store = emails.store.MemoryFileStore() diff --git a/setup.py b/setup.py index 6ffea53..35801c6 100644 --- a/setup.py +++ b/setup.py @@ -127,7 +127,7 @@ def find_version(*file_paths): package_data={'emails': ['py.typed']}, scripts=['scripts/make_rfc822.py'], python_requires='>=3.10', - install_requires=['python-dateutil'], + install_requires=['python-dateutil', 'puremagic'], extras_require={ 'html': ['cssutils', 'lxml', 'chardet', 'requests', 'premailer'], }, From 5e30bbe95d3017bf2bd671da934427045c6d5e88 Mon Sep 17 00:00:00 2001 From: Sergey Lavrinenko Date: Tue, 31 Mar 2026 21:28:48 +0300 Subject: [PATCH 3/4] Support mime detection for file-like streams via read/seek --- emails/store/file.py | 15 ++++++++++----- emails/testsuite/store/test_store.py | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/emails/store/file.py b/emails/store/file.py index b760b5e..bbdaa64 100644 --- a/emails/store/file.py +++ b/emails/store/file.py @@ -106,13 +106,18 @@ def get_mime_type(self) -> str: if not r: _data = self._data if isinstance(_data, bytes): - try: - r = puremagic.from_string(_data, mime=True) - except puremagic.PureError: - pass + header = _data elif isinstance(_data, str): + header = _data.encode() + elif hasattr(_data, 'read'): + pos = _data.tell() + header = _data.read(128) + _data.seek(pos) + else: + header = None + if header: try: - r = puremagic.from_string(_data.encode(), mime=True) + r = puremagic.from_string(header, mime=True) except puremagic.PureError: pass if not r: diff --git a/emails/testsuite/store/test_store.py b/emails/testsuite/store/test_store.py index 33adca5..bedbb45 100644 --- a/emails/testsuite/store/test_store.py +++ b/emails/testsuite/store/test_store.py @@ -89,9 +89,9 @@ def test_mime_type_from_content(): f = BaseFile(data=png_header, filename='image.gif') assert f.mime_type == 'image/gif' - # File-like data: mime detection skips streams, data not exhausted + # File-like data: mime detected without exhausting stream f = BaseFile(data=BytesIO(png_header), filename='no_ext') - assert f.mime_type == 'application/unknown' + assert f.mime_type == 'image/png' assert f.data == png_header # stream not consumed by mime detection From ff5d02952a80f721af4cbe6ad0b72bdbf65203bf Mon Sep 17 00:00:00 2001 From: Sergey Lavrinenko Date: Tue, 31 Mar 2026 21:33:52 +0300 Subject: [PATCH 4/4] Fix mypy union-attr errors in mime detection --- emails/store/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emails/store/file.py b/emails/store/file.py index bbdaa64..b1f94c6 100644 --- a/emails/store/file.py +++ b/emails/store/file.py @@ -109,7 +109,7 @@ def get_mime_type(self) -> str: header = _data elif isinstance(_data, str): header = _data.encode() - elif hasattr(_data, 'read'): + elif _data is not None: pos = _data.tell() header = _data.read(128) _data.seek(pos)