diff --git a/src/spikeinterface/core/time_series.py b/src/spikeinterface/core/time_series.py index 36d946681f..3197d21790 100644 --- a/src/spikeinterface/core/time_series.py +++ b/src/spikeinterface/core/time_series.py @@ -1,6 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +from bisect import bisect_right from typing import Optional, TYPE_CHECKING, TypeAlias import warnings @@ -383,6 +384,21 @@ def _get_time_vectors(self): return time_vectors +def _searchsorted_right_lazy(time_vector: TimeVector, time_s: float | np.ndarray) -> np.int64 | np.ndarray: + """``np.searchsorted(time_vector, time_s, side="right")`` without materializing + the whole ``time_vector``. + + ``np.searchsorted`` is fine for mem-maps, but for out-of-core arrays (zarr) it reads + the whole time vector (even if a ``zarr.Array``) into memory. Bisecting instead reads + O(log N) elements, which saves an order of magnitude of RAM for long recordings. + + ``time_s`` may be a scalar or a 1-D array; the return shape matches it. + """ + if np.ndim(time_s) == 0: + return np.int64(bisect_right(time_vector, time_s)) + return np.array([bisect_right(time_vector, t) for t in time_s], dtype=np.int64) + + class TimeSeriesSegment(BaseSegment): """Per-segment time-series class. Provides time handling methods (sample/time conversion, start/end time, time vectors) on top of ``BaseSegment``.""" @@ -498,8 +514,12 @@ def time_to_sample_index(self, time_s): else: sample_index = (time_s - self.t_start) * self.sampling_frequency sample_index = np.round(sample_index).astype(np.int64) - else: + elif isinstance(self.time_vector, np.ndarray): + # in-memory or memmap: np.searchsorted reads elements lazily for a memmap sample_index = np.searchsorted(self.time_vector, time_s, side="right") - 1 + else: + # out-of-core (zarr): bisect so the whole vector isn't loaded into RAM + sample_index = _searchsorted_right_lazy(self.time_vector, time_s) - 1 return sample_index