From 966512917f82e913459e4a6f752383686a0a5a09 Mon Sep 17 00:00:00 2001 From: adamklie Date: Fri, 8 May 2026 22:08:26 -0700 Subject: [PATCH 1/2] Fix AttributeError in compute_fake_perturbation_tests args.reference_targets is never defined on the argparse namespace; mirror the real-test path's fallback (line 49) and use args.guide_annotation_key instead. This is the correct reference-target list since the fake-test code relabels NT guides to {'non-targeting', 'targeting'} before this call. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../U-test_perturbation_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py index c0aebcc..95fd425 100644 --- a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py +++ b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py @@ -157,7 +157,7 @@ def compute_fake_perturbation_tests(): prog_key=args.prog_key, collapse_targets=True, pseudobulk=False, - reference_targets=args.reference_targets, + reference_targets=args.guide_annotation_key, FDR_method=args.FDR_method, n_jobs=-1, inplace=False From 520cb15d3a7dd56aaa91565bdcba37403842a35e Mon Sep 17 00:00:00 2001 From: adamklie Date: Sat, 9 May 2026 09:12:22 -0700 Subject: [PATCH 2/2] Release per-iteration mdata copies in compute_fake_perturbation_tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fake-test inner loop calls mdata.copy() per iteration, which on real datasets (~10 GB sparse rna matrix) stacks up ~15 GB residual per iteration and OOMs around iteration 16/50 in a 256 GB allocation. Add explicit del + gc.collect() at the end of each iteration so deep-copies are reaped before the next iteration starts. Also release the K-loop mdata between K values to avoid accumulating across the K loop. This is a minimal bandaid — the structural fix would avoid the full mdata.copy() in the first place since the fake-test only mutates obsm and uns on the prog_key modality, not the rna modality (which is what makes the deep-copy expensive). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../U-test_perturbation_calibration.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py index 95fd425..f238400 100644 --- a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py +++ b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py @@ -8,6 +8,7 @@ import os import sys +import gc import yaml import logging import argparse @@ -171,6 +172,16 @@ def compute_fake_perturbation_tests(): test_stats_fake_dfs.append(test_stats_df) # combine all test_stats_fake_dfs_temp.append(test_stats_df) # combine for each k and sel_thresh + # Release per-iteration deep-copy of mdata. Without this, full + # mdata.copy() instances stack up (~10 GB sparse rna matrix per copy + # on real datasets) and OOM around iteration 16/50 in 256 GB. + del _mdata, mdata_samp + gc.collect() + + # Release the K-loop mdata before reloading for next K. + del mdata + gc.collect() + # Save results test_stats_fake_dfs_temp = pd.concat(test_stats_fake_dfs_temp, ignore_index=True) test_stats_fake_dfs_temp.to_csv(