From 9bbe6384a1007db5d0b609070c67cb221a82b20c Mon Sep 17 00:00:00 2001 From: TianyeDong Date: Thu, 28 May 2026 20:29:11 -0400 Subject: [PATCH] Configure minimum free GPU memory gate --- rlix/pipeline/miles_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rlix/pipeline/miles_pipeline.py b/rlix/pipeline/miles_pipeline.py index c23faef..5547672 100644 --- a/rlix/pipeline/miles_pipeline.py +++ b/rlix/pipeline/miles_pipeline.py @@ -577,7 +577,7 @@ def _wait_for_overlap_engines_offloaded(self, allocated_train_gpus, *, timeout_s # overlap GPU IDs. The train actor will need ~3.7 GB for the # 0.5B model + a few GB for activations; aim for ≥20 GB free # before we let _before_training proceed to wake_up. - target_free_gb = 20.0 + target_free_gb = float(os.environ.get("MILES_MIN_FREE_GPU_MEM_GB", "20.0")) deadline2 = time.time() + float(timeout_s) last_min_free_gb: Optional[float] = None nvidia_smi_unavail_count = 0