Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions coremltools/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,41 @@ def does_model_contain_mlprogram(model) -> bool:
f = self._get_function_description(self.function_name)
self._model_input_names_set = set([i.name for i in f.input])

def _close(self):
"""Internal: eagerly release the temporary ``.mlpackage`` directory
(if any) and the compiled ``.mlmodelc`` owned by the underlying
CoreML proxy.

``MLModel`` instances constructed from a ``Model_pb2`` spec allocate a
temporary ``.mlpackage`` under ``$TMPDIR`` via ``_create_mlpackage``.
Historically that directory was only reclaimed at interpreter exit via
``atexit``. In workflows that construct many short-lived ``MLModel``
objects — e.g. activation-statistics collection in
``linear_quantize_activations`` — this caused tens of gigabytes of
leaked temp directories before the process finished.

Not part of the public API: callers should normally rely on
``__del__`` instead. After ``_close`` returns, the instance is no
longer usable for prediction. Safe to call more than once.
"""
# Drop the proxy first so CoreML's mmap of the compiled model is
# released before we rmtree the backing package directory.
self.__proxy__ = None
pkg = getattr(self, "package_path", None)
if getattr(self, "is_temp_package", False) and pkg:
if _os is not None and _os.path.exists(pkg):
_shutil.rmtree(pkg, ignore_errors=True)
self.package_path = None
self.is_temp_package = False

def __del__(self):
# Interpreter-shutdown-safe: swallow every exception so destruction
# during gc never spams stderr.
try:
self._close()
except Exception:
pass

def _get_proxy_and_spec(
self,
filename: str,
Expand Down
129 changes: 129 additions & 0 deletions coremltools/test/modelpackage/test_mlmodel_temp_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright (c) 2026, Apple Inc. All rights reserved.
#
# Use of this source code is governed by a BSD-3-clause license that can be
# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

"""Regression tests for MLModel temporary .mlpackage cleanup.

Historically `MLModel` constructed from a `Model_pb2` spec allocated a temp
`.mlpackage` that was only reclaimed at interpreter exit via `atexit`. This
caused tens of gigabytes of leaked temp directories when many short-lived
MLModel objects were created — for example during activation-statistics
collection in `linear_quantize_activations`. These tests pin the eager
cleanup contract (`MLModel.__del__` / `MLModel._close`).

Parallel-test safety: these tests never mutate ``$TMPDIR`` or
``tempfile.tempdir`` and only inspect ``model.package_path`` for the
specific MLModel instances they construct. Any other test running in
parallel against the same shared temp dir is therefore invisible to the
assertions here.
"""
from __future__ import annotations

import gc
import os

import numpy as np

import coremltools as ct


def _build_spec_mlmodel() -> ct.models.MLModel:
"""Build a tiny mlprogram MLModel from a spec so that MLModel.__init__
hits the `_create_mlpackage` path (i.e. the one that allocates tmp)."""
import torch
import torch.nn as nn

class _Tiny(nn.Module):
def forward(self, x):
return x * 2.0

traced = torch.jit.trace(_Tiny().eval(), torch.zeros(1, 4))
return ct.convert(
traced,
inputs=[ct.TensorType(name="x", shape=(1, 4), dtype=np.float32)],
outputs=[ct.TensorType(name="y", dtype=np.float32)],
minimum_deployment_target=ct.target.iOS18,
compute_units=ct.ComputeUnit.CPU_ONLY,
)


class TestMLModelTempCleanup:
def test_close_removes_temp_package(self):
"""``MLModel._close`` deletes this model's temp .mlpackage and is
idempotent. We only inspect ``model.package_path`` — never the
shared tmpdir — so this is safe under parallel test execution."""
model = _build_spec_mlmodel()
pkg = model.package_path
assert pkg is not None
assert os.path.exists(pkg), "expected temp mlpackage on disk"

model._close()
assert not os.path.exists(pkg), (
f"_close should rmtree temp mlpackage, still present: {pkg}"
)
# Idempotent
model._close()
assert model.package_path is None

def test_del_removes_temp_package(self):
"""Letting the MLModel go out of scope triggers cleanup via __del__.
We captured ``pkg`` before ``del`` and only assert on that specific
path, which makes the assertion independent of any other temp dirs
that may exist in $TMPDIR from parallel tests."""
model = _build_spec_mlmodel()
pkg = model.package_path
assert os.path.exists(pkg)

del model
gc.collect()
assert not os.path.exists(pkg), (
f"__del__ should rmtree temp mlpackage, still present: {pkg}"
)

def test_no_leak_across_many_mlmodel_constructions(self):
"""Simulates the activation-calibration workload that motivated the
fix: constructing many temp MLModels in a row should not accumulate
tmp dirs once each object has been released.

We record each model's ``package_path`` before releasing it and then
assert that every one of those specific paths is gone. This is
strictly stronger than counting ``tmp*.mlpackage`` in $TMPDIR and
doesn't race with other tests that may also be creating temp
packages in the same directory."""
pkgs: list[str] = []
for _ in range(5):
m = _build_spec_mlmodel()
pkgs.append(m.package_path)
del m
gc.collect()

leaked = [p for p in pkgs if p is not None and os.path.exists(p)]
assert not leaked, (
f"{len(leaked)}/{len(pkgs)} temp mlpackage dirs survived __del__: "
f"{leaked}"
)

def test_user_provided_package_path_is_not_deleted(self, tmp_path):
"""Loading a user-owned .mlpackage should NEVER trigger the cleanup
path — only temp packages that MLModel itself created. ``tmp_path``
here is the pytest-provided per-test unique directory, not a
$TMPDIR override, so this remains parallel-safe."""
# Build once via the spec path (creates temp), then save to a
# user-owned location and load from disk.
temp_model = _build_spec_mlmodel()
user_pkg = str(tmp_path / "user_owned.mlpackage")
temp_model.save(user_pkg)
del temp_model
gc.collect()

assert os.path.exists(user_pkg)
loaded = ct.models.MLModel(user_pkg)
assert not getattr(loaded, "is_temp_package", False), (
"Model loaded from a user path must not be flagged as temp."
)
del loaded
gc.collect()
assert os.path.exists(user_pkg), (
"Cleanup must only touch temp packages, not user-provided paths."
)