From 8013f3792505a9954b12f8d080f71eaff7ff4986 Mon Sep 17 00:00:00 2001 From: rachelstephlee Date: Wed, 8 Oct 2025 04:36:05 +0000 Subject: [PATCH 1/7] adding estimate_snr_kurtosis that takes in nwbs --- .../metrics/snr_kurtosis.py | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index 920dc2a..0793a9c 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -35,6 +35,7 @@ from typing import Tuple import numpy as np +import pandas as pd from numpy.typing import NDArray from scipy.signal import find_peaks from scipy.stats import kurtosis @@ -42,9 +43,11 @@ __all__ = ["estimate_snr", "estimate_kurtosis"] -def estimate_snr( + + +def estimate_trace_snr( trace: NDArray[np.floating], fps: float = 20.0 -) -> Tuple[float, float, NDArray[np.intp]]: +) -> tuple[float, float, NDArray[np.intp]]: """ Estimate the signal-to-noise ratio (SNR) of a 1D trace. @@ -110,7 +113,7 @@ def estimate_snr( return snr, noise, peaks -def estimate_kurtosis(trace: NDArray[np.floating]) -> float: +def estimate_trace_kurtosis(trace: NDArray[np.floating]) -> float: """ Compute the **excess kurtosis** of a 1D trace distribution. @@ -132,3 +135,29 @@ def estimate_kurtosis(trace: NDArray[np.floating]) -> float: # Excess kurtosis (normal distribution = 0) return float(kurtosis(trace, fisher=True, bias=False)) + + +def estimate_snr_kurtosis( + nwb, data_column = 'data', + fps: float = 20.0, +) -> pd.DataFrame + """ + Estimate the signal-to-noise ratio (SNR) and kurtosis given an NWB or list of NWBs + + """ + nwb_list = nwb if isinstance(nwb, list) else [nwb] + + trial_metrics = [] + for nwb in nwb_list: + df_fip = nwb.df_fip + ses_idx = df_fip['ses_idx'] + for channel in df_fip.event.unique(): + df_fip_channel_trace = df_fip.query(f'event == {channel}')[data_column].values + (snr, noise, peaks) = estimate_trace_snr(df_fip_channel_trace, fps) + kurtosis = estimate_trace_kurtosis(df_fip_channel_trace) + trial_metrics.append([ses_idx, channel + '_' + data_column, snr, noise, peaks, kurtosis]) + + df_trial_metrics = pd.DataFrame(trial_metrics) + + + return df_trial_metrics \ No newline at end of file From 8511a69c4686bbd681edcbe7b3435fbde3bdaef8 Mon Sep 17 00:00:00 2001 From: rachelstephlee Date: Wed, 8 Oct 2025 04:38:17 +0000 Subject: [PATCH 2/7] adding checks --- .../metrics/snr_kurtosis.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index 0793a9c..305b48b 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -40,7 +40,10 @@ from scipy.signal import find_peaks from scipy.stats import kurtosis -__all__ = ["estimate_snr", "estimate_kurtosis"] +from aind_dynamic_foraging_data_utils import nwb_utils as nu + + +__all__ = ["estimate_trace_snr", "estimate_trace_kurtosis"] @@ -137,7 +140,7 @@ def estimate_trace_kurtosis(trace: NDArray[np.floating]) -> float: return float(kurtosis(trace, fisher=True, bias=False)) -def estimate_snr_kurtosis( +def estimate_snr_and_kurtosis( nwb, data_column = 'data', fps: float = 20.0, ) -> pd.DataFrame @@ -148,8 +151,12 @@ def estimate_snr_kurtosis( nwb_list = nwb if isinstance(nwb, list) else [nwb] trial_metrics = [] - for nwb in nwb_list: - df_fip = nwb.df_fip + for nwb_i in nwb_list: + if not hasattr(nwb_i, "df_fip"): + print("You need to compute the df_fip first") + print("running `nwb.df_fip = create_df_fip(nwb,tidy=True)`") + nwb_i.df_fip = nu.create_df_fip(nwb_i, tidy=True) + df_fip = nwb_i.df_fip ses_idx = df_fip['ses_idx'] for channel in df_fip.event.unique(): df_fip_channel_trace = df_fip.query(f'event == {channel}')[data_column].values From 636aa30581a53d4758aef8279b36c5091c5d904f Mon Sep 17 00:00:00 2001 From: Rachel Lee Date: Thu, 9 Oct 2025 02:11:54 +0000 Subject: [PATCH 3/7] have checked for 1 nwb --- .../metrics/snr_kurtosis.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index 305b48b..5190e9c 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -143,28 +143,30 @@ def estimate_trace_kurtosis(trace: NDArray[np.floating]) -> float: def estimate_snr_and_kurtosis( nwb, data_column = 'data', fps: float = 20.0, -) -> pd.DataFrame +) -> pd.DataFrame: """ Estimate the signal-to-noise ratio (SNR) and kurtosis given an NWB or list of NWBs """ nwb_list = nwb if isinstance(nwb, list) else [nwb] - trial_metrics = [] + sess_metrics = [] for nwb_i in nwb_list: if not hasattr(nwb_i, "df_fip"): print("You need to compute the df_fip first") print("running `nwb.df_fip = create_df_fip(nwb,tidy=True)`") nwb_i.df_fip = nu.create_df_fip(nwb_i, tidy=True) df_fip = nwb_i.df_fip - ses_idx = df_fip['ses_idx'] - for channel in df_fip.event.unique(): - df_fip_channel_trace = df_fip.query(f'event == {channel}')[data_column].values + ses_idx = df_fip['ses_idx'].unique()[0] + all_channels = [channel for channel in df_fip.event.unique() if + not channel.startswith("FIP") and not channel.startswith("Iso")] + processed_signal_channels = [channel for channel in all_channels if channel.endswith('dff-poly_mc-iso-IRLS')] + for channel in processed_signal_channels: + df_fip_channel_trace = df_fip.query(f"event == '{channel}'")[data_column].values (snr, noise, peaks) = estimate_trace_snr(df_fip_channel_trace, fps) kurtosis = estimate_trace_kurtosis(df_fip_channel_trace) - trial_metrics.append([ses_idx, channel + '_' + data_column, snr, noise, peaks, kurtosis]) - - df_trial_metrics = pd.DataFrame(trial_metrics) - - - return df_trial_metrics \ No newline at end of file + sess_metrics.append([ses_idx, channel + '_' + data_column, snr, noise, peaks, kurtosis]) + # put together df_sess_metrics + df_sess_metrics = pd.DataFrame(sess_metrics, columns=['ses_idx', 'channel', 'SNR', + 'noise', 'peaks', 'kurtosis']) + return df_sess_metrics From 50db47e0f8b8b63680c3081075b60e72720e92df Mon Sep 17 00:00:00 2001 From: Rachel Lee Date: Thu, 9 Oct 2025 02:23:48 +0000 Subject: [PATCH 4/7] linting --- .../metrics/snr_kurtosis.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index 5190e9c..f6d769b 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -2,9 +2,10 @@ Utilities for signal quality metrics on 1D fluorescence traces. This module provides: -- :func:`estimate_snr` — an SNR estimator using a derivative-based noise +- :func:`estimate_trace_snr` — an SNR estimator using a derivative-based noise estimate and peak-based signal estimate. -- :func:`estimate_kurtosis` — excess kurtosis of the trace distribution. +- :func:`estimate_trace_kurtosis` — excess kurtosis of the trace distribution. +- :func:`estimate_snr_and_kurtosis` — excess kurtosis of the trace distribution. Notes ----- @@ -46,8 +47,6 @@ __all__ = ["estimate_trace_snr", "estimate_trace_kurtosis"] - - def estimate_trace_snr( trace: NDArray[np.floating], fps: float = 20.0 ) -> tuple[float, float, NDArray[np.intp]]: @@ -141,7 +140,7 @@ def estimate_trace_kurtosis(trace: NDArray[np.floating]) -> float: def estimate_snr_and_kurtosis( - nwb, data_column = 'data', + nwb, data_column='data', fps: float = 20.0, ) -> pd.DataFrame: """ @@ -158,9 +157,10 @@ def estimate_snr_and_kurtosis( nwb_i.df_fip = nu.create_df_fip(nwb_i, tidy=True) df_fip = nwb_i.df_fip ses_idx = df_fip['ses_idx'].unique()[0] - all_channels = [channel for channel in df_fip.event.unique() if + all_channels = [channel for channel in df_fip.event.unique() if not channel.startswith("FIP") and not channel.startswith("Iso")] - processed_signal_channels = [channel for channel in all_channels if channel.endswith('dff-poly_mc-iso-IRLS')] + processed_signal_channels = [channel for channel in all_channels if + channel.endswith('dff-poly_mc-iso-IRLS')] for channel in processed_signal_channels: df_fip_channel_trace = df_fip.query(f"event == '{channel}'")[data_column].values (snr, noise, peaks) = estimate_trace_snr(df_fip_channel_trace, fps) @@ -168,5 +168,5 @@ def estimate_snr_and_kurtosis( sess_metrics.append([ses_idx, channel + '_' + data_column, snr, noise, peaks, kurtosis]) # put together df_sess_metrics df_sess_metrics = pd.DataFrame(sess_metrics, columns=['ses_idx', 'channel', 'SNR', - 'noise', 'peaks', 'kurtosis']) + 'noise', 'peaks', 'kurtosis']) return df_sess_metrics From 0b56c7b439153a3f3b82e0f0dbb4c42bb5e05234 Mon Sep 17 00:00:00 2001 From: Rachel Lee Date: Thu, 9 Oct 2025 02:31:59 +0000 Subject: [PATCH 5/7] linting and documentation --- .../metrics/snr_kurtosis.py | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index f6d769b..0341a02 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -5,7 +5,7 @@ - :func:`estimate_trace_snr` — an SNR estimator using a derivative-based noise estimate and peak-based signal estimate. - :func:`estimate_trace_kurtosis` — excess kurtosis of the trace distribution. -- :func:`estimate_snr_and_kurtosis` — excess kurtosis of the trace distribution. +- :func:`estimate_snr_and_kurtosis` — estimate the SNR and kurtosis using an NWB Notes ----- @@ -33,7 +33,6 @@ from __future__ import annotations import warnings -from typing import Tuple import numpy as np import pandas as pd @@ -141,11 +140,30 @@ def estimate_trace_kurtosis(trace: NDArray[np.floating]) -> float: def estimate_snr_and_kurtosis( nwb, data_column='data', + process_suffix='dff-poly_mc-iso-IRLS', fps: float = 20.0, ) -> pd.DataFrame: """ - Estimate the signal-to-noise ratio (SNR) and kurtosis given an NWB or list of NWBs + Estimate the signal-to-noise ratio (SNR) and kurtosis given an NWB or list of NWBs. + Iso channels will be excluded, and by default, only the preprocessed, motion-corrected + channels will be used. + Parameters + ---------- + nwb: + Single or list of nwb or nwb-like object + data_column: string, optional + The data_column for the df_fip, by default 'data' + process_suffix: string, optional + The suffix of the channel indicating processing method, by default 'dff-poly_mc-iso-IRLS' + fps : float, optional + Sampling frequency (frames per second), by default ``20.0``. + + Returns + ------- + df.Dataframe + Dataframe with each row a session and channel with columns of estimated + signal to noise ratio (SNR), noise, peaks in the trace, and excess kurtosis. """ nwb_list = nwb if isinstance(nwb, list) else [nwb] @@ -160,7 +178,7 @@ def estimate_snr_and_kurtosis( all_channels = [channel for channel in df_fip.event.unique() if not channel.startswith("FIP") and not channel.startswith("Iso")] processed_signal_channels = [channel for channel in all_channels if - channel.endswith('dff-poly_mc-iso-IRLS')] + channel.endswith(process_suffix)] for channel in processed_signal_channels: df_fip_channel_trace = df_fip.query(f"event == '{channel}'")[data_column].values (snr, noise, peaks) = estimate_trace_snr(df_fip_channel_trace, fps) From 2bdb54b7d81f87805deddf7e9366729000741f7d Mon Sep 17 00:00:00 2001 From: Rachel Lee Date: Fri, 10 Oct 2025 00:15:33 +0000 Subject: [PATCH 6/7] not adding the df_fip to the nwb object; i think this might be what is causing OOM errors when i'm looking at 100+ sessions --- .../metrics/snr_kurtosis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index 0341a02..6d42a2f 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -172,8 +172,9 @@ def estimate_snr_and_kurtosis( if not hasattr(nwb_i, "df_fip"): print("You need to compute the df_fip first") print("running `nwb.df_fip = create_df_fip(nwb,tidy=True)`") - nwb_i.df_fip = nu.create_df_fip(nwb_i, tidy=True) - df_fip = nwb_i.df_fip + df_fip = nu.create_df_fip(nwb_i, tidy=True) + else: + df_fip = nwb_i.df_fip ses_idx = df_fip['ses_idx'].unique()[0] all_channels = [channel for channel in df_fip.event.unique() if not channel.startswith("FIP") and not channel.startswith("Iso")] From 1a978e95ffe40cd1360b94949c693aa4e736b1ed Mon Sep 17 00:00:00 2001 From: Rachel Lee Date: Fri, 10 Oct 2025 00:29:58 +0000 Subject: [PATCH 7/7] further cutting down on memory-- querying the processed_signal_channels to save space. --- src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py index 6d42a2f..7db42f6 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/snr_kurtosis.py @@ -180,6 +180,7 @@ def estimate_snr_and_kurtosis( not channel.startswith("FIP") and not channel.startswith("Iso")] processed_signal_channels = [channel for channel in all_channels if channel.endswith(process_suffix)] + df_fip = df_fip[df_fip["event"].isin(processed_signal_channels)] for channel in processed_signal_channels: df_fip_channel_trace = df_fip.query(f"event == '{channel}'")[data_column].values (snr, noise, peaks) = estimate_trace_snr(df_fip_channel_trace, fps)