From 6aadb6a1462be80b01a6817e98ac0af75fd630fd Mon Sep 17 00:00:00 2001 From: gp201 Date: Wed, 11 Mar 2026 11:18:03 -0700 Subject: [PATCH 1/3] Accumulate counts when combining mutations Use += when adding an already-existing combined mutation so counts are summed instead of overwritten. --- barcodeforge/generate_barcodes.py | 2 +- tests/test_generate_barcodes.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/barcodeforge/generate_barcodes.py b/barcodeforge/generate_barcodes.py index f267478..1d0c115 100755 --- a/barcodeforge/generate_barcodes.py +++ b/barcodeforge/generate_barcodes.py @@ -196,7 +196,7 @@ def check_mutation_chain(df_barcodes: pd.DataFrame) -> pd.DataFrame: else: # combining leads to already existing mutation # just add in that mutation - df_barcodes.loc[lin_seq.index, sm[2]] = 1 + df_barcodes.loc[lin_seq.index, sm[2]] += 1 # remove constituent mutations df_barcodes.loc[lin_seq.index, sm[0:2]] -= 1 # drop all unused mutations diff --git a/tests/test_generate_barcodes.py b/tests/test_generate_barcodes.py index 0483c58..5a832cd 100644 --- a/tests/test_generate_barcodes.py +++ b/tests/test_generate_barcodes.py @@ -91,6 +91,19 @@ def test_check_mutation_chain(sample_barcode_data): assert isinstance(chained_df, pd.DataFrame) +def test_check_mutation_chain(): + sample_barcode_data = pd.DataFrame( + {"A225G": [1], "A225T": [1], "C225A": [1], "G225T": [1], "T225C": [2]}, + index=["lineage"], + ) + chained_df = check_mutation_chain(sample_barcode_data.copy()) + df_barcodes_ideal = pd.DataFrame( + {"A225C": [1]}, + index=["lineage"], + ) + pd.testing.assert_frame_equal(chained_df, df_barcodes_ideal) + + def test_replace_underscore_with_dash(): data = {"value": [1, 2]} df = pd.DataFrame(data, index=["lineage_A", "lineage_B"]) From 0150abefd018b47b6b458006d3c7d1e36589f9a4 Mon Sep 17 00:00:00 2001 From: gp201 Date: Wed, 11 Mar 2026 14:10:19 -0700 Subject: [PATCH 2/3] feat: Assert barcode matrix is binary --- barcodeforge/generate_barcodes.py | 2 ++ tests/test_generate_barcodes.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/barcodeforge/generate_barcodes.py b/barcodeforge/generate_barcodes.py index 1d0c115..65e20f3 100755 --- a/barcodeforge/generate_barcodes.py +++ b/barcodeforge/generate_barcodes.py @@ -207,6 +207,8 @@ def check_mutation_chain(df_barcodes: pd.DataFrame) -> pd.DataFrame: # in case mutation path leads to a return to the reference. df_barcodes = reversion_checking(df_barcodes) seq_muts = identify_chains(df_barcodes) + # The barcode should be a binary sparse matrix + assert df_barcodes.isin([0, 1]).all(axis=None), "Barcode matrix should be binary" return df_barcodes diff --git a/tests/test_generate_barcodes.py b/tests/test_generate_barcodes.py index 5a832cd..1484890 100644 --- a/tests/test_generate_barcodes.py +++ b/tests/test_generate_barcodes.py @@ -91,7 +91,7 @@ def test_check_mutation_chain(sample_barcode_data): assert isinstance(chained_df, pd.DataFrame) -def test_check_mutation_chain(): +def test_check_mutation_chain_repetitve_mutations(): sample_barcode_data = pd.DataFrame( {"A225G": [1], "A225T": [1], "C225A": [1], "G225T": [1], "T225C": [2]}, index=["lineage"], @@ -103,6 +103,20 @@ def test_check_mutation_chain(): ) pd.testing.assert_frame_equal(chained_df, df_barcodes_ideal) +def test_check_mutation_chain_non_binary_values(): + sample_barcode_data = pd.DataFrame( + { + "A225G": [1], + "A225T": [1], + "C225A": [1], + "G225T": [1], + "T225C": [2], + "C123A": [2], + }, + index=["lineage"], + ) + with pytest.raises(AssertionError, match="Barcode matrix should be binary"): + check_mutation_chain(sample_barcode_data.copy()) def test_replace_underscore_with_dash(): data = {"value": [1, 2]} From 509146f34e731a89e70e9b2c37a7250e447fb451 Mon Sep 17 00:00:00 2001 From: gp201 Date: Wed, 11 Mar 2026 14:23:46 -0700 Subject: [PATCH 3/3] lint: lint fix --- tests/test_generate_barcodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_generate_barcodes.py b/tests/test_generate_barcodes.py index 1484890..b078650 100644 --- a/tests/test_generate_barcodes.py +++ b/tests/test_generate_barcodes.py @@ -103,6 +103,7 @@ def test_check_mutation_chain_repetitve_mutations(): ) pd.testing.assert_frame_equal(chained_df, df_barcodes_ideal) + def test_check_mutation_chain_non_binary_values(): sample_barcode_data = pd.DataFrame( { @@ -118,6 +119,7 @@ def test_check_mutation_chain_non_binary_values(): with pytest.raises(AssertionError, match="Barcode matrix should be binary"): check_mutation_chain(sample_barcode_data.copy()) + def test_replace_underscore_with_dash(): data = {"value": [1, 2]} df = pd.DataFrame(data, index=["lineage_A", "lineage_B"])