From 8846dbd8aaeae4603b27d87b92937b6459d8f329 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sun, 12 Apr 2026 11:37:42 +0900 Subject: [PATCH] Release temp .mlpackage eagerly in MLModel.__del__ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `MLModel` constructed from a `Model_pb2` spec allocates a temporary `.mlpackage` under `$TMPDIR` via `_create_mlpackage`. Historically the only cleanup path was `atexit.register(cleanup, self.package_path)`, so the directory survived until the Python process exited. In workflows that construct many short-lived MLModel objects — most visibly `coremltools.optimize.coreml.experimental.linear_quantize_activations`, which builds one temporary model per op-group batch in `_ModelDebugger.predict_intermediate_outputs` — this caused tens of gigabytes of leaked temp directories. Measured on macOS 15 / Apple Silicon / coremltools 9.0 / Python 3.12: - 8-layer 2048-dim toy model, 4 calibration samples: leaks +6 `.mlpackage` dirs / +402.9 MB per call. - Gemma 4 E2B chunk2 (~300 MB per package): ~38 GB retained before the process finishes; fills a 49 GB disk. With this patch both numbers drop to 0. This change adds `MLModel.close()` and `MLModel.__del__` that release the temp package as soon as the Python object goes out of scope. The C++ proxy is dropped first so the compiled `.mlmodelc` mmap is released before `rmtree`. User-owned `.mlpackage` paths loaded from disk are never flagged `is_temp_package` and are never touched. The existing `atexit` hook is kept as a safety net for objects that are still alive at interpreter shutdown. Includes regression tests under `coremltools/test/modelpackage/test_mlmodel_temp_cleanup.py` that cover `close()` idempotency, `__del__` cleanup, no-leak across many constructions, and that user-provided paths are preserved. --- coremltools/models/model.py | 35 +++++ .../modelpackage/test_mlmodel_temp_cleanup.py | 129 ++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 coremltools/test/modelpackage/test_mlmodel_temp_cleanup.py diff --git a/coremltools/models/model.py b/coremltools/models/model.py index 56fcf8a98..21e9f3257 100644 --- a/coremltools/models/model.py +++ b/coremltools/models/model.py @@ -583,6 +583,41 @@ def does_model_contain_mlprogram(model) -> bool: f = self._get_function_description(self.function_name) self._model_input_names_set = set([i.name for i in f.input]) + def _close(self): + """Internal: eagerly release the temporary ``.mlpackage`` directory + (if any) and the compiled ``.mlmodelc`` owned by the underlying + CoreML proxy. + + ``MLModel`` instances constructed from a ``Model_pb2`` spec allocate a + temporary ``.mlpackage`` under ``$TMPDIR`` via ``_create_mlpackage``. + Historically that directory was only reclaimed at interpreter exit via + ``atexit``. In workflows that construct many short-lived ``MLModel`` + objects — e.g. activation-statistics collection in + ``linear_quantize_activations`` — this caused tens of gigabytes of + leaked temp directories before the process finished. + + Not part of the public API: callers should normally rely on + ``__del__`` instead. After ``_close`` returns, the instance is no + longer usable for prediction. Safe to call more than once. + """ + # Drop the proxy first so CoreML's mmap of the compiled model is + # released before we rmtree the backing package directory. + self.__proxy__ = None + pkg = getattr(self, "package_path", None) + if getattr(self, "is_temp_package", False) and pkg: + if _os is not None and _os.path.exists(pkg): + _shutil.rmtree(pkg, ignore_errors=True) + self.package_path = None + self.is_temp_package = False + + def __del__(self): + # Interpreter-shutdown-safe: swallow every exception so destruction + # during gc never spams stderr. + try: + self._close() + except Exception: + pass + def _get_proxy_and_spec( self, filename: str, diff --git a/coremltools/test/modelpackage/test_mlmodel_temp_cleanup.py b/coremltools/test/modelpackage/test_mlmodel_temp_cleanup.py new file mode 100644 index 000000000..9934284bb --- /dev/null +++ b/coremltools/test/modelpackage/test_mlmodel_temp_cleanup.py @@ -0,0 +1,129 @@ +# Copyright (c) 2026, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +"""Regression tests for MLModel temporary .mlpackage cleanup. + +Historically `MLModel` constructed from a `Model_pb2` spec allocated a temp +`.mlpackage` that was only reclaimed at interpreter exit via `atexit`. This +caused tens of gigabytes of leaked temp directories when many short-lived +MLModel objects were created — for example during activation-statistics +collection in `linear_quantize_activations`. These tests pin the eager +cleanup contract (`MLModel.__del__` / `MLModel._close`). + +Parallel-test safety: these tests never mutate ``$TMPDIR`` or +``tempfile.tempdir`` and only inspect ``model.package_path`` for the +specific MLModel instances they construct. Any other test running in +parallel against the same shared temp dir is therefore invisible to the +assertions here. +""" +from __future__ import annotations + +import gc +import os + +import numpy as np + +import coremltools as ct + + +def _build_spec_mlmodel() -> ct.models.MLModel: + """Build a tiny mlprogram MLModel from a spec so that MLModel.__init__ + hits the `_create_mlpackage` path (i.e. the one that allocates tmp).""" + import torch + import torch.nn as nn + + class _Tiny(nn.Module): + def forward(self, x): + return x * 2.0 + + traced = torch.jit.trace(_Tiny().eval(), torch.zeros(1, 4)) + return ct.convert( + traced, + inputs=[ct.TensorType(name="x", shape=(1, 4), dtype=np.float32)], + outputs=[ct.TensorType(name="y", dtype=np.float32)], + minimum_deployment_target=ct.target.iOS18, + compute_units=ct.ComputeUnit.CPU_ONLY, + ) + + +class TestMLModelTempCleanup: + def test_close_removes_temp_package(self): + """``MLModel._close`` deletes this model's temp .mlpackage and is + idempotent. We only inspect ``model.package_path`` — never the + shared tmpdir — so this is safe under parallel test execution.""" + model = _build_spec_mlmodel() + pkg = model.package_path + assert pkg is not None + assert os.path.exists(pkg), "expected temp mlpackage on disk" + + model._close() + assert not os.path.exists(pkg), ( + f"_close should rmtree temp mlpackage, still present: {pkg}" + ) + # Idempotent + model._close() + assert model.package_path is None + + def test_del_removes_temp_package(self): + """Letting the MLModel go out of scope triggers cleanup via __del__. + We captured ``pkg`` before ``del`` and only assert on that specific + path, which makes the assertion independent of any other temp dirs + that may exist in $TMPDIR from parallel tests.""" + model = _build_spec_mlmodel() + pkg = model.package_path + assert os.path.exists(pkg) + + del model + gc.collect() + assert not os.path.exists(pkg), ( + f"__del__ should rmtree temp mlpackage, still present: {pkg}" + ) + + def test_no_leak_across_many_mlmodel_constructions(self): + """Simulates the activation-calibration workload that motivated the + fix: constructing many temp MLModels in a row should not accumulate + tmp dirs once each object has been released. + + We record each model's ``package_path`` before releasing it and then + assert that every one of those specific paths is gone. This is + strictly stronger than counting ``tmp*.mlpackage`` in $TMPDIR and + doesn't race with other tests that may also be creating temp + packages in the same directory.""" + pkgs: list[str] = [] + for _ in range(5): + m = _build_spec_mlmodel() + pkgs.append(m.package_path) + del m + gc.collect() + + leaked = [p for p in pkgs if p is not None and os.path.exists(p)] + assert not leaked, ( + f"{len(leaked)}/{len(pkgs)} temp mlpackage dirs survived __del__: " + f"{leaked}" + ) + + def test_user_provided_package_path_is_not_deleted(self, tmp_path): + """Loading a user-owned .mlpackage should NEVER trigger the cleanup + path — only temp packages that MLModel itself created. ``tmp_path`` + here is the pytest-provided per-test unique directory, not a + $TMPDIR override, so this remains parallel-safe.""" + # Build once via the spec path (creates temp), then save to a + # user-owned location and load from disk. + temp_model = _build_spec_mlmodel() + user_pkg = str(tmp_path / "user_owned.mlpackage") + temp_model.save(user_pkg) + del temp_model + gc.collect() + + assert os.path.exists(user_pkg) + loaded = ct.models.MLModel(user_pkg) + assert not getattr(loaded, "is_temp_package", False), ( + "Model loaded from a user path must not be flagged as temp." + ) + del loaded + gc.collect() + assert os.path.exists(user_pkg), ( + "Cleanup must only touch temp packages, not user-provided paths." + )