From 966512917f82e913459e4a6f752383686a0a5a09 Mon Sep 17 00:00:00 2001
From: adamklie <aklie@ucsd.edu>
Date: Fri, 8 May 2026 22:08:26 -0700
Subject: [PATCH 1/2] Fix AttributeError in compute_fake_perturbation_tests

args.reference_targets is never defined on the argparse namespace; mirror
the real-test path's fallback (line 49) and use args.guide_annotation_key
instead. This is the correct reference-target list since the fake-test code
relabels NT guides to {'non-targeting', 'targeting'} before this call.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../U-test_perturbation_calibration.py                          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py
index c0aebcc..95fd425 100644
--- a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py
+++ b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py
@@ -157,7 +157,7 @@ def compute_fake_perturbation_tests():
                         prog_key=args.prog_key,
                         collapse_targets=True,
                         pseudobulk=False,
-                        reference_targets=args.reference_targets,
+                        reference_targets=args.guide_annotation_key,
                         FDR_method=args.FDR_method,
                         n_jobs=-1,
                         inplace=False

From 520cb15d3a7dd56aaa91565bdcba37403842a35e Mon Sep 17 00:00:00 2001
From: adamklie <aklie@ucsd.edu>
Date: Sat, 9 May 2026 09:12:22 -0700
Subject: [PATCH 2/2] Release per-iteration mdata copies in
 compute_fake_perturbation_tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fake-test inner loop calls mdata.copy() per iteration, which on real
datasets (~10 GB sparse rna matrix) stacks up ~15 GB residual per iteration
and OOMs around iteration 16/50 in a 256 GB allocation. Add explicit
del + gc.collect() at the end of each iteration so deep-copies are reaped
before the next iteration starts. Also release the K-loop mdata between K
values to avoid accumulating across the K loop.

This is a minimal bandaid — the structural fix would avoid the full
mdata.copy() in the first place since the fake-test only mutates obsm and
uns on the prog_key modality, not the rna modality (which is what makes
the deep-copy expensive).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../U-test_perturbation_calibration.py                | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py
index 95fd425..f238400 100644
--- a/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py
+++ b/src/Stage2_Evaluation/B_Calibration/Slurm_version/U-test_perturbation_calibration/U-test_perturbation_calibration.py
@@ -8,6 +8,7 @@
 
 import os
 import sys
+import gc
 import yaml
 import logging
 import argparse
@@ -171,6 +172,16 @@ def compute_fake_perturbation_tests():
                     test_stats_fake_dfs.append(test_stats_df) # combine all
                     test_stats_fake_dfs_temp.append(test_stats_df) # combine for each k and sel_thresh
 
+                # Release per-iteration deep-copy of mdata. Without this, full
+                # mdata.copy() instances stack up (~10 GB sparse rna matrix per copy
+                # on real datasets) and OOM around iteration 16/50 in 256 GB.
+                del _mdata, mdata_samp
+                gc.collect()
+
+            # Release the K-loop mdata before reloading for next K.
+            del mdata
+            gc.collect()
+
             # Save results
             test_stats_fake_dfs_temp = pd.concat(test_stats_fake_dfs_temp, ignore_index=True)
             test_stats_fake_dfs_temp.to_csv(