From 7ee4f6665a20f11de57d6ec26f89e1df6b3cefd1 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 13:07:47 -0700 Subject: [PATCH 01/16] update tdms models --- .../lib/sift_client/sift_types/data_import.py | 142 +++++++++++++++++- 1 file changed, 138 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index 73bc64454..5dcdc0420 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -17,6 +17,12 @@ PARQUET_COMPLEX_TYPES_IMPORT_MODE_BYTES, PARQUET_COMPLEX_TYPES_IMPORT_MODE_IGNORE, PARQUET_COMPLEX_TYPES_IMPORT_MODE_STRING, + TDMS_COMPLEX_COMPONENT_IMAGINARY, + TDMS_COMPLEX_COMPONENT_REAL, + TDMS_COMPLEX_COMPONENT_UNSPECIFIED, + TDMS_FALLBACK_METHOD_FAIL_ON_ERROR, + TDMS_FALLBACK_METHOD_IGNORE_ERROR, + TDMS_FALLBACK_METHOD_UNSPECIFIED, ) from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto @@ -38,6 +44,7 @@ ) from sift.data_imports.v2.data_imports_pb2 import ParquetTimeColumn as ParquetTimeColumnProto from sift.data_imports.v2.data_imports_pb2 import TDMSConfig as TDMSConfigProto +from sift.data_imports.v2.data_imports_pb2 import TdmsDataConfig as TdmsDataConfigProto from sift.data_imports.v2.data_imports_pb2 import TimeFormat as TimeFormatProto from sift_client._internal.util.timestamp import to_pb_timestamp @@ -533,30 +540,157 @@ def _from_proto( ) +class TdmsFallbackMethod(Enum): + """Controls behavior when TDMS channels lack timing information.""" + + FAIL_ON_ERROR = TDMS_FALLBACK_METHOD_FAIL_ON_ERROR + IGNORE_ERROR = TDMS_FALLBACK_METHOD_IGNORE_ERROR + + +class TdmsComplexComponent(Enum): + """Selects which component to import from complex-valued TDMS data.""" + + REAL = TDMS_COMPLEX_COMPONENT_REAL + IMAGINARY = TDMS_COMPLEX_COMPONENT_IMAGINARY + + +class TdmsDataColumn(DataColumnBase): + """Per-channel configuration for TDMS imports. + + Attributes: + group_name: The TDMS group name. + channel_name: The TDMS channel name. + time_channel_name: Explicit time channel. If unset, assumes waveform properties. + scaled: Whether to import scaled or raw values. Defaults to True. + complex_component: Which component to import for complex types. Defaults to real. + """ + + group_name: str + channel_name: str + time_channel_name: str | None = None + scaled: bool | None = None + complex_component: TdmsComplexComponent | None = None + + class TdmsImportConfig(ImportConfigBase): """Configuration for importing a TDMS file. Attributes: start_time_override: Override the ``wf_start_time`` metadata field for all channels. Useful when waveform channels have ``wf_increment`` but no ``wf_start_time``. - file_size: The file size in bytes. Required if the file has truncated chunks. + data: Per-channel configurations. If empty, ingests everything using the fallback method. + fallback_method: How to handle channels with missing timing information. + time_format: Time format for time channels not using the TDMS timestamp type. + relative_start_time: Relative start time for channels using a non-standard time channel. + import_file_properties: If true, imports TDMS file properties as run metadata. """ start_time_override: datetime | None = None - file_size: int | None = None + data: list[TdmsDataColumn] = [] + fallback_method: TdmsFallbackMethod | None = None + time_format: TimeFormat | None = None + relative_start_time: datetime | None = None + import_file_properties: bool = False + + def __getitem__(self, name: str) -> TdmsDataColumn: + """Look up a data column by channel name.""" + for d in self.data: + if d.name == name: + return d + raise KeyError(f"No data column named '{name}'") def _to_proto(self) -> TDMSConfigProto: proto = TDMSConfigProto( asset_name=self.asset_name, run_name=self.run_name or "", run_id=self.run_id or "", + import_file_properties=self.import_file_properties, ) if self.start_time_override is not None: proto.start_time_override.CopyFrom(to_pb_timestamp(self.start_time_override)) - if self.file_size is not None: - proto.file_size = self.file_size + if self.fallback_method is not None: + proto.fallback_method = self.fallback_method.value + if self.time_format is not None: + proto.time_format = self.time_format.value + if self.relative_start_time is not None: + proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time)) + for d in self.data: + entry = TdmsDataConfigProto( + group_name=d.group_name, + channel_name=d.channel_name, + channel_config=ChannelConfigProto( + name=d.name, + data_type=d.data_type.value, + units=d.units, + description=d.description, + ), + ) + if d.time_channel_name is not None: + entry.time_channel_name = d.time_channel_name + if d.scaled is not None: + entry.scaled = d.scaled + if d.complex_component is not None: + entry.complex_component = d.complex_component.value + proto.data.append(entry) return proto + @classmethod + def _from_proto(cls, proto: TDMSConfigProto) -> TdmsImportConfig: + """Create from a proto TDMSConfig (e.g. from DetectConfig response).""" + start_time_override = None + if proto.HasField("start_time_override"): + from datetime import timezone + + start_time_override = proto.start_time_override.ToDatetime(tzinfo=timezone.utc) + + relative_start_time = None + if proto.HasField("relative_start_time"): + from datetime import timezone + + relative_start_time = proto.relative_start_time.ToDatetime(tzinfo=timezone.utc) + + data = [] + for d in proto.data: + ch = d.channel_config + complex_component = None + if d.complex_component and d.complex_component != TDMS_COMPLEX_COMPONENT_UNSPECIFIED: + complex_component = TdmsComplexComponent(d.complex_component) + data.append( + TdmsDataColumn( + group_name=d.group_name, + channel_name=d.channel_name, + name=ch.name, + data_type=ChannelDataType(ch.data_type), + units=ch.units, + description=ch.description, + time_channel_name=d.time_channel_name + if d.HasField("time_channel_name") + else None, + scaled=d.scaled if d.HasField("scaled") else None, + complex_component=complex_component, + ) + ) + + fallback_method = None + if proto.fallback_method and proto.fallback_method != TDMS_FALLBACK_METHOD_UNSPECIFIED: + fallback_method = TdmsFallbackMethod(proto.fallback_method) + + time_format = None + if proto.HasField("time_format"): + time_format = TimeFormat(proto.time_format) + + return cls( + asset_name=proto.asset_name, + run_name=proto.run_name or None, + run_id=proto.run_id or None, + start_time_override=start_time_override, + data=data, + fallback_method=fallback_method, + time_format=time_format, + relative_start_time=relative_start_time, + import_file_properties=proto.import_file_properties, + ) + class Hdf5DataColumn(DataColumnBase): """A dataset mapping for HDF5 imports. From 761611aef76f9eb19d26fb90bd3e041caa1ff5fe Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 13:13:27 -0700 Subject: [PATCH 02/16] remove references to deprecated file_size --- .../lib/sift_client/_tests/resources/test_data_imports.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py index a3819cf1c..6b8dcf03e 100644 --- a/python/lib/sift_client/_tests/resources/test_data_imports.py +++ b/python/lib/sift_client/_tests/resources/test_data_imports.py @@ -137,12 +137,12 @@ def test_to_proto(self): run_name="run1", run_id="run_123", start_time_override=datetime(2026, 1, 1, tzinfo=timezone.utc), - file_size=12345, + import_file_properties=True, ) proto = config._to_proto() assert proto.asset_name == "my_asset" assert proto.run_id == "run_123" - assert proto.file_size == 12345 + assert proto.import_file_properties is True assert proto.HasField("start_time_override") def test_to_proto_optional_fields_unset(self): @@ -151,7 +151,7 @@ def test_to_proto_optional_fields_unset(self): assert proto.run_name == "run1" assert proto.run_id == "" assert not proto.HasField("start_time_override") - assert proto.file_size == 0 + assert proto.import_file_properties is False def test_run_id_takes_precedence(self): config = TdmsImportConfig(asset_name="a", run_name="ignored", run_id="run_123") From ed2cf417e3e19a6c869cf07dd3fd9218c7580d69 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 14:25:59 -0700 Subject: [PATCH 03/16] add client-side tdms detect_config and wire it into data_imports --- python/lib/sift_client/_internal/util/tdms.py | 242 ++++++++++++++++++ .../lib/sift_client/resources/data_imports.py | 11 +- .../lib/sift_client/sift_types/data_import.py | 21 +- 3 files changed, 263 insertions(+), 11 deletions(-) create mode 100644 python/lib/sift_client/_internal/util/tdms.py diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py new file mode 100644 index 000000000..f35454f32 --- /dev/null +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from pathlib import Path +from nptdms import TdmsChannel, TdmsFile, TdmsGroup, types + +from sift_client.sift_types.channel import ChannelDataType +from sift_client.sift_types.data_import import ( + TdmsComplexComponent, + TdmsDataColumn, + TdmsFallbackMethod, + TdmsImportConfig, +) + +# Common names for dedicated time channels within a group +TIME_CHANNEL_NAMES = ["time", "Time", "TIME", "timestamp", "Timestamp", "t", "T"] + +# Common property names used to detect the units of a channel in TDMS files. +COMMON_UNIT_PROPS = [ + "unit_string", + "NI_UnitDescription", +] + +# Common property names used to detect the description of a channel in TDMS files. +COMMON_DESCRIPTION_PROPS = ["description", "NI_Description", "Description"] + +# Common unit strings that indicate a channel represents time (waveform x-axis). +COMMON_WAVEFORM_TIME_UNITS = [ + "s", + "sec", + "second", + "seconds", + "ms", + "millisecond", + "milliseconds", + "us", + "microsecond", + "microseconds", + "ns", + "nanosecond", + "nanoseconds", +] + +# Mapping from numpy scalar type to Sift channel data type. +_NUMPY_TO_SIFT: dict[type, ChannelDataType] = { + np.bool_: ChannelDataType.BOOL, + np.int8: ChannelDataType.INT_32, + np.int16: ChannelDataType.INT_32, + np.int32: ChannelDataType.INT_32, + np.int64: ChannelDataType.INT_64, + np.uint8: ChannelDataType.UINT_32, + np.uint16: ChannelDataType.UINT_32, + np.uint32: ChannelDataType.UINT_32, + np.uint64: ChannelDataType.UINT_64, + np.float32: ChannelDataType.FLOAT, + np.float64: ChannelDataType.DOUBLE, + np.datetime64: ChannelDataType.INT_64, + np.complex64: ChannelDataType.FLOAT, + np.complex128: ChannelDataType.DOUBLE, + np.str_: ChannelDataType.STRING, + np.bytes_: ChannelDataType.STRING, + np.object_: ChannelDataType.STRING, + np.void: ChannelDataType.BYTES, +} + + +def _numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: + """Map a numpy dtype to a Sift ChannelDataType.""" + sift_type = _NUMPY_TO_SIFT.get(dtype.type) + if sift_type is None: + raise ValueError(f"Unsupported numpy dtype: {dtype}") + return sift_type + + +def detect_properties(channel: TdmsChannel, possible_props: list, default: str = "") -> str: + """Return the first matching property value from a list of possible property names.""" + for prop in possible_props: + value = channel.properties.get(prop) + if value: + return value + return default + + +def detect_enum_types(channel: TdmsChannel) -> dict[str, int] | None: + """Check if the TDMS channel is embedded with enum configs. + + Returns a name-to-key mapping, or None if no enum config is present. + """ + name = f"{channel.group_name}/{channel.name}" + + enum_config_data = channel.properties.get("enum_config") + if not enum_config_data: + return None + try: + enum_configs = json.loads(enum_config_data) + except Exception as e: + raise ValueError(f"Failed to decode JSON enum_configs for {name}: {e}") from e + + enum_types: dict[str, int] = {} + for enum_key, enum_name in enum_configs.items(): + try: + key = int(enum_key) + except ValueError as e: + raise ValueError(f"{enum_key} is not a valid enum integer for ({name})") from e + if key < 0: + raise ValueError(f"{enum_key} is not a valid unsigned enum integer ({name})") + enum_types[enum_name] = key + + return enum_types if enum_types else None + + +def is_waveform_time_channel(channel: TdmsChannel) -> bool: + """A waveform channel carries wf_start_offset and wf_increment properties.""" + return "wf_start_offset" in channel.properties and "wf_increment" in channel.properties + + +def find_time_channel(group: TdmsGroup) -> str | None: + """Return the name of a dedicated time channel in the group, if one exists. + + Detection order: + 1. Group-level 'xchannel' property. + 2. Any channel with TDMS TimeStamp data type. + 3. Any channel matching a common time name. + """ + channels = group.channels() + channel_names = {ch.name for ch in channels} + + # 1. Explicit xchannel property set by the file author. + xchannel = group.properties.get("xchannel") + if xchannel and xchannel in channel_names: + return xchannel + + # 2. Native datetime type. + for ch in channels: + if ch.data_type == types.TimeStamp: + return ch.name + + # 3. Common time names. + for name in TIME_CHANNEL_NAMES: + if name in channel_names: + return name + + return None + + +def detect_config( + file_path: str | Path, + asset_name: str = "", + fallback_method: TdmsFallbackMethod = TdmsFallbackMethod.FAIL_ON_ERROR, +) -> TdmsImportConfig: + """Detect a TDMS import config by inspecting the file's channels. + + Args: + file_path: Path to the TDMS file. + asset_name: The asset name to set on the config. + fallback_method: How to handle channels with missing timing information. + + Returns: + A TdmsImportConfig populated with detected channel configurations. + """ + data: list[TdmsDataColumn] = [] + + with TdmsFile.open(file_path) as tdms_file: + for group in tdms_file.groups(): + group_name = group.name + time_channel_name = find_time_channel(group) + + for channel in group.channels(): + tdms_channel_name = channel.name + + # Skip channels that are used as a time axis + if tdms_channel_name == time_channel_name: + continue + + # Channel name will always be . + channel_name = f"{group_name}.{tdms_channel_name}" + + units = detect_properties(channel, COMMON_UNIT_PROPS) + description = detect_properties(channel, COMMON_DESCRIPTION_PROPS) + enum_types = detect_enum_types(channel) + + candidates: list[tuple[str, ChannelDataType, TdmsComplexComponent | None]] = [] + if np.issubdtype(channel.dtype, np.complexfloating): + # Split complex channel into separate .real and .imag channels. + sift_type = _numpy_to_sift_type(channel.dtype) + candidates.append( + (f"{channel_name}.real", sift_type, TdmsComplexComponent.REAL) + ) + candidates.append( + (f"{channel_name}.imag", sift_type, TdmsComplexComponent.IMAGINARY) + ) + else: + sift_type = ( + ChannelDataType.ENUM if enum_types else _numpy_to_sift_type(channel.dtype) + ) + candidates.append((channel_name, sift_type, None)) + + for name, data_type, complex_component in candidates: + if is_waveform_time_channel(channel): + data.append( + TdmsDataColumn( + group_name=group_name, + channel_name=tdms_channel_name, + name=name, + data_type=data_type, + units=units, + description=description, + time_channel_name=None, + complex_component=complex_component, + enum_types=enum_types, + ) + ) + elif time_channel_name is not None: + data.append( + TdmsDataColumn( + group_name=group_name, + channel_name=tdms_channel_name, + name=name, + data_type=data_type, + units=units, + description=description, + time_channel_name=time_channel_name, + complex_component=complex_component, + enum_types=enum_types, + ) + ) + # Non time series data (e.g, binary blob, spectrum data, etc.) + else: + if fallback_method == TdmsFallbackMethod.IGNORE_ERROR: + continue + raise ValueError(f"No timing information for {channel_name}") + + return TdmsImportConfig( + asset_name=asset_name, + data=data, + fallback_method=fallback_method, + ) diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py index 6f77260ae..5b4651442 100644 --- a/python/lib/sift_client/resources/data_imports.py +++ b/python/lib/sift_client/resources/data_imports.py @@ -7,6 +7,7 @@ from sift_client._internal.util.executor import run_sync_function from sift_client._internal.util.file import extract_parquet_footer, upload_file from sift_client._internal.util.hdf5 import detect_hdf5_config +from sift_client._internal.util.tdms import detect_config as detect_tdms_config from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset from sift_client.sift_types.channel import ChannelDataType @@ -62,8 +63,7 @@ async def import_from_path( completion before proceeding. When ``config`` is omitted the file format is auto-detected via - ``detect_config`` (CSV, Parquet, and HDF5). For other formats - (TDMS), ``config`` must be provided. + ``detect_config`` (CSV, Parquet, HDF5, and TDMS). When ``asset`` is provided it overrides the config value; otherwise the config's ``asset_name`` is used. If neither ``run`` nor ``run_name`` is provided (and none is @@ -199,9 +199,8 @@ async def detect_config( is inferred from the file extension when ``data_type`` is not provided. - CSV, Parquet, and HDF5 files are supported for auto-detection. - For other formats (TDMS), create the config manually - using ``TdmsImportConfig``. + CSV, Parquet, HDF5, and TDMS files are supported for + auto-detection. For CSV files, the server scans the first two rows for an optional JSON metadata row. Row 1 is checked first; row 2 is checked only @@ -246,6 +245,8 @@ async def detect_config( if data_type_key == DataTypeKey.HDF5: return await run_sync_function(lambda: detect_hdf5_config(path)) + if data_type_key == DataTypeKey.TDMS: + return await run_sync_function(lambda: detect_tdms_config(path)) is_parquet = data_type_key in ( DataTypeKey.PARQUET_FLATDATASET, diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index 5dcdc0420..337b56773 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -7,6 +7,7 @@ from pydantic import BaseModel, model_validator from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto +from sift.common.type.v1.channel_enum_type_pb2 import ChannelEnumType as ChannelEnumTypeProto from sift.data_imports.v2.data_imports_pb2 import ( DATA_TYPE_KEY_CSV, DATA_TYPE_KEY_HDF5, @@ -570,6 +571,7 @@ class TdmsDataColumn(DataColumnBase): time_channel_name: str | None = None scaled: bool | None = None complex_component: TdmsComplexComponent | None = None + enum_types: dict[str, int] | None = None class TdmsImportConfig(ImportConfigBase): @@ -615,15 +617,20 @@ def _to_proto(self) -> TDMSConfigProto: if self.relative_start_time is not None: proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time)) for d in self.data: + channel_config = ChannelConfigProto( + name=d.name, + data_type=d.data_type.value, + units=d.units, + description=d.description, + ) + if d.enum_types: + channel_config.enum_types.extend( + ChannelEnumTypeProto(name=name, key=key) for name, key in d.enum_types.items() + ) entry = TdmsDataConfigProto( group_name=d.group_name, channel_name=d.channel_name, - channel_config=ChannelConfigProto( - name=d.name, - data_type=d.data_type.value, - units=d.units, - description=d.description, - ), + channel_config=channel_config, ) if d.time_channel_name is not None: entry.time_channel_name = d.time_channel_name @@ -655,6 +662,7 @@ def _from_proto(cls, proto: TDMSConfigProto) -> TdmsImportConfig: complex_component = None if d.complex_component and d.complex_component != TDMS_COMPLEX_COMPONENT_UNSPECIFIED: complex_component = TdmsComplexComponent(d.complex_component) + enum_types = {e.name: e.key for e in ch.enum_types} if ch.enum_types else None data.append( TdmsDataColumn( group_name=d.group_name, @@ -668,6 +676,7 @@ def _from_proto(cls, proto: TDMSConfigProto) -> TdmsImportConfig: else None, scaled=d.scaled if d.HasField("scaled") else None, complex_component=complex_component, + enum_types=enum_types, ) ) From f6842883ab2de972d903a6f75e5457a015763649 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 14:30:44 -0700 Subject: [PATCH 04/16] add nptdms to mypy overrides, missing typed stubs --- python/pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/pyproject.toml b/python/pyproject.toml index 7a681373f..048210a60 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -321,6 +321,11 @@ module = "ruamel" ignore_missing_imports = true ignore_errors = true +[[tool.mypy.overrides]] +module = "nptdms" +ignore_missing_imports = true +ignore_errors = true + [tool.setuptools.packages.find] where = ["lib"] From 687a3e94596ae4f31f6af64310d2c61517fa989a Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 14:35:56 -0700 Subject: [PATCH 05/16] tdms detect_config tests --- .../sift_client/_tests/_internal/test_tdms.py | 376 ++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 python/lib/sift_client/_tests/_internal/test_tdms.py diff --git a/python/lib/sift_client/_tests/_internal/test_tdms.py b/python/lib/sift_client/_tests/_internal/test_tdms.py new file mode 100644 index 000000000..f4e2df408 --- /dev/null +++ b/python/lib/sift_client/_tests/_internal/test_tdms.py @@ -0,0 +1,376 @@ +"""Tests for detect_config (TDMS).""" + +import numpy as np +import pytest +from nptdms import ChannelObject, GroupObject, RootObject, TdmsWriter + +from sift_client._internal.util.tdms import detect_config +from sift_client.sift_types.channel import ChannelDataType +from sift_client.sift_types.data_import import TdmsComplexComponent, TdmsFallbackMethod + + +@pytest.fixture +def create_tdms_file(tmp_path): + """Return a helper that writes a TDMS file and returns its path.""" + file_path = tmp_path / "test.tdms" + + def _create(root_props=None, groups=None): + """Write a TDMS file. + + Args: + root_props: dict of root-level file properties. + groups: list of (group_name, channels) tuples where channels is a list of + ChannelObject instances. + """ + root = RootObject(properties=root_props or {}) + with TdmsWriter(file_path) as writer: + for group_name, channels in groups or []: + group = GroupObject(group_name) + writer.write_segment([root, group, *channels]) + return file_path + + return _create + + +class TestDetectConfig: + def test_waveform_channels(self, create_tdms_file): + """Channels with wf_start_offset and wf_increment are detected as waveform channels.""" + path = create_tdms_file( + groups=[ + ( + "sensors", + [ + ChannelObject( + "sensors", + "voltage", + np.array([1.0, 2.0, 3.0], dtype="float64"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 0.001, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + }, + ), + ], + ) + ] + ) + + config = detect_config(path) + + assert len(config.data) == 1 + assert config.data[0].name == "sensors.voltage" + assert config.data[0].data_type == ChannelDataType.DOUBLE + assert config.data[0].time_channel_name is None + assert config.data[0].group_name == "sensors" + assert config.data[0].channel_name == "voltage" + + def test_time_channel_detection(self, create_tdms_file): + """A channel with TimeStamp type is used as the time source and excluded from data.""" + path = create_tdms_file( + groups=[ + ( + "group1", + [ + ChannelObject( + "group1", + "timestamp", + np.array( + ["2024-01-01", "2024-01-02"], + dtype="datetime64[ns]", + ), + ), + ChannelObject( + "group1", + "temperature", + np.array([20.5, 21.0], dtype="float32"), + ), + ], + ) + ] + ) + + config = detect_config(path) + + channel_names = [d.name for d in config.data] + assert "group1.timestamp" not in channel_names + assert "group1.temperature" in channel_names + assert config.data[0].time_channel_name == "timestamp" + assert config.data[0].data_type == ChannelDataType.FLOAT + + def test_common_time_name_detection(self, create_tdms_file): + """Channels named 'time', 'Time', etc. are detected as time channels.""" + path = create_tdms_file( + groups=[ + ( + "data", + [ + ChannelObject( + "data", + "time", + np.array([0.0, 0.1, 0.2], dtype="float64"), + ), + ChannelObject( + "data", + "pressure", + np.array([101.3, 101.4, 101.5], dtype="float64"), + ), + ], + ) + ] + ) + + config = detect_config(path) + + channel_names = [d.name for d in config.data] + assert "data.time" not in channel_names + assert "data.pressure" in channel_names + assert config.data[0].time_channel_name == "time" + + def test_complex_channels_split(self, create_tdms_file): + """Complex-valued channels are split into .real and .imag entries.""" + path = create_tdms_file( + groups=[ + ( + "rf", + [ + ChannelObject( + "rf", + "signal", + np.array([1 + 2j, 3 + 4j], dtype="complex128"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 0.001, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + }, + ), + ], + ) + ] + ) + + config = detect_config(path) + + assert len(config.data) == 2 + names = [d.name for d in config.data] + assert "rf.signal.real" in names + assert "rf.signal.imag" in names + + real_col = next(d for d in config.data if d.name == "rf.signal.real") + imag_col = next(d for d in config.data if d.name == "rf.signal.imag") + assert real_col.complex_component == TdmsComplexComponent.REAL + assert imag_col.complex_component == TdmsComplexComponent.IMAGINARY + assert real_col.data_type == ChannelDataType.DOUBLE + assert imag_col.data_type == ChannelDataType.DOUBLE + + def test_unit_and_description_detection(self, create_tdms_file): + """Units and descriptions are read from TDMS channel properties.""" + path = create_tdms_file( + groups=[ + ( + "sensors", + [ + ChannelObject( + "sensors", + "voltage", + np.array([1.0, 2.0], dtype="float64"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 0.001, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + "unit_string": "V", + "description": "Supply voltage", + }, + ), + ], + ) + ] + ) + + config = detect_config(path) + + assert config.data[0].units == "V" + assert config.data[0].description == "Supply voltage" + + def test_fallback_fail_on_error(self, create_tdms_file): + """Channels without timing info raise ValueError when fallback is FAIL_ON_ERROR.""" + path = create_tdms_file( + groups=[ + ( + "data", + [ + ChannelObject( + "data", + "orphan", + np.array([1.0, 2.0], dtype="float64"), + ), + ], + ) + ] + ) + + with pytest.raises(ValueError, match="No timing information"): + detect_config(path, fallback_method=TdmsFallbackMethod.FAIL_ON_ERROR) + + def test_fallback_ignore_error(self, create_tdms_file): + """Channels without timing info are silently skipped when fallback is IGNORE_ERROR.""" + path = create_tdms_file( + groups=[ + ( + "data", + [ + ChannelObject( + "data", + "orphan", + np.array([1.0, 2.0], dtype="float64"), + ), + ], + ) + ] + ) + + config = detect_config(path, fallback_method=TdmsFallbackMethod.IGNORE_ERROR) + + assert len(config.data) == 0 + assert config.fallback_method == TdmsFallbackMethod.IGNORE_ERROR + + def test_multiple_groups(self, create_tdms_file): + """Channels from multiple groups are all detected with correct group_name.""" + path = create_tdms_file( + groups=[ + ( + "group_a", + [ + ChannelObject( + "group_a", + "ch1", + np.array([1.0, 2.0], dtype="float64"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 0.001, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + }, + ), + ], + ), + ( + "group_b", + [ + ChannelObject( + "group_b", + "ch2", + np.array([3, 4], dtype="int32"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 0.001, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + }, + ), + ], + ), + ] + ) + + config = detect_config(path) + + assert len(config.data) == 2 + assert config.data[0].group_name == "group_a" + assert config.data[0].name == "group_a.ch1" + assert config.data[0].data_type == ChannelDataType.DOUBLE + assert config.data[1].group_name == "group_b" + assert config.data[1].name == "group_b.ch2" + assert config.data[1].data_type == ChannelDataType.INT_32 + + def test_enum_channel_detection(self, create_tdms_file): + """Channels with enum_config property are detected as ENUM type with enum_types populated.""" + import json + + enum_config = json.dumps({"0": "Off", "1": "On", "2": "Error"}) + path = create_tdms_file( + groups=[ + ( + "status", + [ + ChannelObject( + "status", + "state", + np.array([0, 1, 2], dtype="uint32"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 1.0, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + "enum_config": enum_config, + }, + ), + ], + ) + ] + ) + + config = detect_config(path) + + assert len(config.data) == 1 + assert config.data[0].data_type == ChannelDataType.ENUM + assert config.data[0].enum_types == {"Off": 0, "On": 1, "Error": 2} + + def test_asset_name_passthrough(self, create_tdms_file): + """The asset_name parameter is set on the returned config.""" + path = create_tdms_file( + groups=[ + ( + "g", + [ + ChannelObject( + "g", + "ch", + np.array([1.0], dtype="float64"), + properties={ + "wf_start_offset": 0.0, + "wf_increment": 0.001, + "wf_start_time": np.datetime64("2024-01-01T00:00:00"), + }, + ), + ], + ) + ] + ) + + config = detect_config(path, asset_name="my-asset") + + assert config.asset_name == "my-asset" + + def test_xchannel_property(self, create_tdms_file): + """Group-level 'xchannel' property overrides time channel detection.""" + path = create_tdms_file( + groups=[ + ( + "data", + [ + ChannelObject( + "data", + "custom_time", + np.array([0.0, 1.0, 2.0], dtype="float64"), + ), + ChannelObject( + "data", + "value", + np.array([10.0, 20.0, 30.0], dtype="float64"), + ), + ], + ) + ] + ) + + # nptdms TdmsWriter doesn't support group-level properties directly in segments, + # so we write the file and then patch the group property by re-reading/writing. + # Instead, test via the find_time_channel helper. + from nptdms import TdmsFile + + from sift_client._internal.util.tdms import find_time_channel + + with TdmsFile.open(path) as tdms_file: + group = tdms_file["data"] + # Simulate xchannel property + group.properties["xchannel"] = "custom_time" + result = find_time_channel(group) + + assert result == "custom_time" From d4ad373ae5032a38a2866ea178f52fb271deb0f2 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 15 Apr 2026 15:51:22 -0700 Subject: [PATCH 06/16] set tdms fallback method default to fail-on-error --- python/lib/sift_client/sift_types/data_import.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index 337b56773..27053c982 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -589,7 +589,7 @@ class TdmsImportConfig(ImportConfigBase): start_time_override: datetime | None = None data: list[TdmsDataColumn] = [] - fallback_method: TdmsFallbackMethod | None = None + fallback_method: TdmsFallbackMethod = TdmsFallbackMethod.FAIL_ON_ERROR time_format: TimeFormat | None = None relative_start_time: datetime | None = None import_file_properties: bool = False @@ -610,8 +610,7 @@ def _to_proto(self) -> TDMSConfigProto: ) if self.start_time_override is not None: proto.start_time_override.CopyFrom(to_pb_timestamp(self.start_time_override)) - if self.fallback_method is not None: - proto.fallback_method = self.fallback_method.value + proto.fallback_method = self.fallback_method.value if self.time_format is not None: proto.time_format = self.time_format.value if self.relative_start_time is not None: From e4ece748250531d4ed7040971768c86da22d648c Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 15 Apr 2026 17:13:14 -0700 Subject: [PATCH 07/16] mypy fix --- python/lib/sift_client/sift_types/data_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index 27053c982..ff55e9beb 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -679,7 +679,7 @@ def _from_proto(cls, proto: TDMSConfigProto) -> TdmsImportConfig: ) ) - fallback_method = None + fallback_method = TdmsFallbackMethod.FAIL_ON_ERROR if proto.fallback_method and proto.fallback_method != TDMS_FALLBACK_METHOD_UNSPECIFIED: fallback_method = TdmsFallbackMethod(proto.fallback_method) From b17b091d77f1d5f77cb1e0226553d98d7fb1e378 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 16 Apr 2026 12:32:25 -0700 Subject: [PATCH 08/16] refactor: extract numpy_to_sift_type and mapping into shared util --- python/lib/sift_client/_internal/util/hdf5.py | 40 ++----------------- .../sift_client/_internal/util/numpy_types.py | 38 ++++++++++++++++++ python/lib/sift_client/_internal/util/tdms.py | 35 ++-------------- 3 files changed, 44 insertions(+), 69 deletions(-) create mode 100644 python/lib/sift_client/_internal/util/numpy_types.py diff --git a/python/lib/sift_client/_internal/util/hdf5.py b/python/lib/sift_client/_internal/util/hdf5.py index c0edfe820..507b039f0 100644 --- a/python/lib/sift_client/_internal/util/hdf5.py +++ b/python/lib/sift_client/_internal/util/hdf5.py @@ -3,9 +3,8 @@ from pathlib import Path import h5py -import numpy as np -from sift_client.sift_types.channel import ChannelDataType +from sift_client._internal.util.numpy_types import numpy_to_sift_type from sift_client.sift_types.data_import import Hdf5DataColumn, Hdf5ImportConfig, TimeFormat # Common HDF5 attribute names used to detect channel metadata. @@ -13,31 +12,6 @@ _UNIT_ATTRS = ["Unit", "unit", "Units", "units"] _DESCRIPTION_ATTRS = ["Description", "description"] -_NUMPY_TO_SIFT: dict[type, ChannelDataType] = { - np.bool_: ChannelDataType.BOOL, - np.int8: ChannelDataType.INT_32, - np.int16: ChannelDataType.INT_32, - np.int32: ChannelDataType.INT_32, - np.int64: ChannelDataType.INT_64, - np.uint8: ChannelDataType.UINT_32, - np.uint16: ChannelDataType.UINT_32, - np.uint32: ChannelDataType.UINT_32, - np.uint64: ChannelDataType.UINT_64, - np.float32: ChannelDataType.FLOAT, - np.float64: ChannelDataType.DOUBLE, - np.datetime64: ChannelDataType.INT_64, - np.complex64: ChannelDataType.FLOAT, - np.complex128: ChannelDataType.DOUBLE, - np.str_: ChannelDataType.STRING, - # HDF5/TDMS fixed-length strings are stored as np.bytes_; use STRING, not - # BYTES (np.void below handles truly opaque binary data). - np.bytes_: ChannelDataType.STRING, - # Numpy uses object dtype for variable-length strings; TDMS/HDF5 files - # cannot produce non-string object arrays. - np.object_: ChannelDataType.STRING, - np.void: ChannelDataType.BYTES, -} - def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str: """Return the first matching HDF5 attribute value, or *default*.""" @@ -45,14 +19,6 @@ def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "" return str(possible[0]) if possible else default -def _numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: - """Map a numpy dtype to a Sift ChannelDataType.""" - sift_type = _NUMPY_TO_SIFT.get(dtype.type) - if sift_type is None: - raise ValueError(f"Unsupported numpy dtype: {dtype}") - return sift_type - - def detect_hdf5_config(file_path: str | Path) -> Hdf5ImportConfig: """Detect an HDF5 import config by inspecting the file's datasets. @@ -88,7 +54,7 @@ def _visit(dataset_name: str, obj: object) -> None: columns.append( Hdf5DataColumn( name=channel_name, - data_type=_numpy_to_sift_type(obj.dtype[value_index]), + data_type=numpy_to_sift_type(obj.dtype[value_index]), units=_detect_attr(obj, _UNIT_ATTRS), description=_detect_attr(obj, _DESCRIPTION_ATTRS), time_dataset=dataset_name, @@ -110,7 +76,7 @@ def _visit(dataset_name: str, obj: object) -> None: columns.append( Hdf5DataColumn( name=channel_name, - data_type=_numpy_to_sift_type(obj.dtype), + data_type=numpy_to_sift_type(obj.dtype), units=_detect_attr(obj, _UNIT_ATTRS), description=_detect_attr(obj, _DESCRIPTION_ATTRS), time_dataset="time" if has_root_time else "", diff --git a/python/lib/sift_client/_internal/util/numpy_types.py b/python/lib/sift_client/_internal/util/numpy_types.py new file mode 100644 index 000000000..46f8a3a38 --- /dev/null +++ b/python/lib/sift_client/_internal/util/numpy_types.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import numpy as np + +from sift_client.sift_types.channel import ChannelDataType + +NUMPY_TO_SIFT_TYPE: dict[type, ChannelDataType] = { + np.bool_: ChannelDataType.BOOL, + np.int8: ChannelDataType.INT_32, + np.int16: ChannelDataType.INT_32, + np.int32: ChannelDataType.INT_32, + np.int64: ChannelDataType.INT_64, + np.uint8: ChannelDataType.UINT_32, + np.uint16: ChannelDataType.UINT_32, + np.uint32: ChannelDataType.UINT_32, + np.uint64: ChannelDataType.UINT_64, + np.float32: ChannelDataType.FLOAT, + np.float64: ChannelDataType.DOUBLE, + np.datetime64: ChannelDataType.INT_64, + np.complex64: ChannelDataType.FLOAT, + np.complex128: ChannelDataType.DOUBLE, + np.str_: ChannelDataType.STRING, + # HDF5/TDMS fixed-length strings are stored as np.bytes_; use STRING, not + # BYTES (np.void below handles truly opaque binary data). + np.bytes_: ChannelDataType.STRING, + # Numpy uses object dtype for variable-length strings; TDMS/HDF5 files + # cannot produce non-string object arrays. + np.object_: ChannelDataType.STRING, + np.void: ChannelDataType.BYTES, +} + + +def numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: + """Map a numpy dtype to a Sift ChannelDataType.""" + sift_type = NUMPY_TO_SIFT_TYPE.get(dtype.type) + if sift_type is None: + raise ValueError(f"Unsupported numpy dtype: {dtype}") + return sift_type diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py index f35454f32..d7fbd1e51 100644 --- a/python/lib/sift_client/_internal/util/tdms.py +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -9,6 +9,7 @@ from pathlib import Path from nptdms import TdmsChannel, TdmsFile, TdmsGroup, types +from sift_client._internal.util.numpy_types import numpy_to_sift_type from sift_client.sift_types.channel import ChannelDataType from sift_client.sift_types.data_import import ( TdmsComplexComponent, @@ -46,36 +47,6 @@ "nanoseconds", ] -# Mapping from numpy scalar type to Sift channel data type. -_NUMPY_TO_SIFT: dict[type, ChannelDataType] = { - np.bool_: ChannelDataType.BOOL, - np.int8: ChannelDataType.INT_32, - np.int16: ChannelDataType.INT_32, - np.int32: ChannelDataType.INT_32, - np.int64: ChannelDataType.INT_64, - np.uint8: ChannelDataType.UINT_32, - np.uint16: ChannelDataType.UINT_32, - np.uint32: ChannelDataType.UINT_32, - np.uint64: ChannelDataType.UINT_64, - np.float32: ChannelDataType.FLOAT, - np.float64: ChannelDataType.DOUBLE, - np.datetime64: ChannelDataType.INT_64, - np.complex64: ChannelDataType.FLOAT, - np.complex128: ChannelDataType.DOUBLE, - np.str_: ChannelDataType.STRING, - np.bytes_: ChannelDataType.STRING, - np.object_: ChannelDataType.STRING, - np.void: ChannelDataType.BYTES, -} - - -def _numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: - """Map a numpy dtype to a Sift ChannelDataType.""" - sift_type = _NUMPY_TO_SIFT.get(dtype.type) - if sift_type is None: - raise ValueError(f"Unsupported numpy dtype: {dtype}") - return sift_type - def detect_properties(channel: TdmsChannel, possible_props: list, default: str = "") -> str: """Return the first matching property value from a list of possible property names.""" @@ -187,7 +158,7 @@ def detect_config( candidates: list[tuple[str, ChannelDataType, TdmsComplexComponent | None]] = [] if np.issubdtype(channel.dtype, np.complexfloating): # Split complex channel into separate .real and .imag channels. - sift_type = _numpy_to_sift_type(channel.dtype) + sift_type = numpy_to_sift_type(channel.dtype) candidates.append( (f"{channel_name}.real", sift_type, TdmsComplexComponent.REAL) ) @@ -196,7 +167,7 @@ def detect_config( ) else: sift_type = ( - ChannelDataType.ENUM if enum_types else _numpy_to_sift_type(channel.dtype) + ChannelDataType.ENUM if enum_types else numpy_to_sift_type(channel.dtype) ) candidates.append((channel_name, sift_type, None)) From ec1ad08bd24593ce6e9fc97648ccf9f129b767e0 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 16 Apr 2026 16:05:54 -0700 Subject: [PATCH 09/16] updated time channel detection on tdms groups --- python/lib/sift_client/_internal/util/tdms.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py index d7fbd1e51..ddbf0aa59 100644 --- a/python/lib/sift_client/_internal/util/tdms.py +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -95,8 +95,7 @@ def find_time_channel(group: TdmsGroup) -> str | None: Detection order: 1. Group-level 'xchannel' property. - 2. Any channel with TDMS TimeStamp data type. - 3. Any channel matching a common time name. + 2. Look for the time channel in the first index. """ channels = group.channels() channel_names = {ch.name for ch in channels} @@ -106,15 +105,9 @@ def find_time_channel(group: TdmsGroup) -> str | None: if xchannel and xchannel in channel_names: return xchannel - # 2. Native datetime type. - for ch in channels: - if ch.data_type == types.TimeStamp: - return ch.name - - # 3. Common time names. - for name in TIME_CHANNEL_NAMES: - if name in channel_names: - return name + # 2. Native datetime type in first index + if channels and channels[0].data_type == types.TimeStamp: + return channels[0].name return None From ca03b12dc52cba2286df37d522bb4ee73b50dc86 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 16 Apr 2026 16:11:46 -0700 Subject: [PATCH 10/16] drop name-based time-channel fallback, update tests --- python/lib/sift_client/_internal/util/tdms.py | 3 --- .../sift_client/_tests/_internal/test_tdms.py | 17 +++++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py index ddbf0aa59..044b3c923 100644 --- a/python/lib/sift_client/_internal/util/tdms.py +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -18,9 +18,6 @@ TdmsImportConfig, ) -# Common names for dedicated time channels within a group -TIME_CHANNEL_NAMES = ["time", "Time", "TIME", "timestamp", "Timestamp", "t", "T"] - # Common property names used to detect the units of a channel in TDMS files. COMMON_UNIT_PROPS = [ "unit_string", diff --git a/python/lib/sift_client/_tests/_internal/test_tdms.py b/python/lib/sift_client/_tests/_internal/test_tdms.py index f4e2df408..ee8d2bb69 100644 --- a/python/lib/sift_client/_tests/_internal/test_tdms.py +++ b/python/lib/sift_client/_tests/_internal/test_tdms.py @@ -97,8 +97,13 @@ def test_time_channel_detection(self, create_tdms_file): assert config.data[0].time_channel_name == "timestamp" assert config.data[0].data_type == ChannelDataType.FLOAT - def test_common_time_name_detection(self, create_tdms_file): - """Channels named 'time', 'Time', etc. are detected as time channels.""" + def test_common_time_name_not_detected(self, create_tdms_file): + """A channel named 'time' without TimeStamp dtype is NOT auto-detected. + + The TDMS protocol requires either a group-level 'xchannel' property + or a native TimeStamp-typed first channel. Name-based fallbacks + were intentionally removed. + """ path = create_tdms_file( groups=[ ( @@ -119,12 +124,8 @@ def test_common_time_name_detection(self, create_tdms_file): ] ) - config = detect_config(path) - - channel_names = [d.name for d in config.data] - assert "data.time" not in channel_names - assert "data.pressure" in channel_names - assert config.data[0].time_channel_name == "time" + with pytest.raises(ValueError, match="No timing information"): + detect_config(path) def test_complex_channels_split(self, create_tdms_file): """Complex-valued channels are split into .real and .imag entries.""" From 8eb6ace8ad8ae796163a04665d3906151e1477ed Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 16 Apr 2026 22:33:38 -0700 Subject: [PATCH 11/16] include group description alongisde channel --- python/lib/sift_client/_internal/util/tdms.py | 30 ++++++++++++++----- .../sift_client/_tests/_internal/test_tdms.py | 2 +- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py index 044b3c923..6e09bfee8 100644 --- a/python/lib/sift_client/_internal/util/tdms.py +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -45,15 +45,24 @@ ] -def detect_properties(channel: TdmsChannel, possible_props: list, default: str = "") -> str: +def detect_properties(obj: TdmsChannel | TdmsGroup, possible_props: list, default: str = "") -> str: """Return the first matching property value from a list of possible property names.""" for prop in possible_props: - value = channel.properties.get(prop) + value = obj.properties.get(prop) if value: return value return default +def create_description(group_description: str, channel_description: str) -> str: + """Combine TDMS group and channel descriptions into a single Sift description.""" + group_description = group_description.strip() + channel_description = channel_description.strip() + group_entry = f"Group: {group_description}" if group_description else "" + channel_entry = f"Channel: {channel_description}" if channel_description else "" + return "\n".join([group_entry, channel_entry]).strip() + + def detect_enum_types(channel: TdmsChannel) -> dict[str, int] | None: """Check if the TDMS channel is embedded with enum configs. @@ -93,6 +102,8 @@ def find_time_channel(group: TdmsGroup) -> str | None: Detection order: 1. Group-level 'xchannel' property. 2. Look for the time channel in the first index. + + https://www.ni.com/en/support/documentation/supplemental/12/writing-data-management-ready-tdms-files.html """ channels = group.channels() channel_names = {ch.name for ch in channels} @@ -130,6 +141,7 @@ def detect_config( for group in tdms_file.groups(): group_name = group.name time_channel_name = find_time_channel(group) + group_description = detect_properties(group, COMMON_DESCRIPTION_PROPS) for channel in group.channels(): tdms_channel_name = channel.name @@ -142,7 +154,8 @@ def detect_config( channel_name = f"{group_name}.{tdms_channel_name}" units = detect_properties(channel, COMMON_UNIT_PROPS) - description = detect_properties(channel, COMMON_DESCRIPTION_PROPS) + channel_description = detect_properties(channel, COMMON_DESCRIPTION_PROPS) + description = create_description(group_description, channel_description) enum_types = detect_enum_types(channel) candidates: list[tuple[str, ChannelDataType, TdmsComplexComponent | None]] = [] @@ -162,7 +175,10 @@ def detect_config( candidates.append((channel_name, sift_type, None)) for name, data_type, complex_component in candidates: - if is_waveform_time_channel(channel): + # If a time channel is present, that takes priority. + # Some applications will generate invalid waveform + # properties that are not meant to be used. + if time_channel_name is not None: data.append( TdmsDataColumn( group_name=group_name, @@ -171,12 +187,12 @@ def detect_config( data_type=data_type, units=units, description=description, - time_channel_name=None, + time_channel_name=time_channel_name, complex_component=complex_component, enum_types=enum_types, ) ) - elif time_channel_name is not None: + elif is_waveform_time_channel(channel): data.append( TdmsDataColumn( group_name=group_name, @@ -185,7 +201,7 @@ def detect_config( data_type=data_type, units=units, description=description, - time_channel_name=time_channel_name, + time_channel_name=None, complex_component=complex_component, enum_types=enum_types, ) diff --git a/python/lib/sift_client/_tests/_internal/test_tdms.py b/python/lib/sift_client/_tests/_internal/test_tdms.py index ee8d2bb69..8a7ae5fbe 100644 --- a/python/lib/sift_client/_tests/_internal/test_tdms.py +++ b/python/lib/sift_client/_tests/_internal/test_tdms.py @@ -190,7 +190,7 @@ def test_unit_and_description_detection(self, create_tdms_file): config = detect_config(path) assert config.data[0].units == "V" - assert config.data[0].description == "Supply voltage" + assert config.data[0].description == "Channel: Supply voltage" def test_fallback_fail_on_error(self, create_tdms_file): """Channels without timing info raise ValueError when fallback is FAIL_ON_ERROR.""" From bc3352a7ee30c7c869502f1c9d2cd7c113ba504a Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 17 Apr 2026 12:08:25 -0700 Subject: [PATCH 12/16] removed redundant unit list --- python/lib/sift_client/_internal/util/tdms.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py index 6e09bfee8..32375a349 100644 --- a/python/lib/sift_client/_internal/util/tdms.py +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -27,23 +27,6 @@ # Common property names used to detect the description of a channel in TDMS files. COMMON_DESCRIPTION_PROPS = ["description", "NI_Description", "Description"] -# Common unit strings that indicate a channel represents time (waveform x-axis). -COMMON_WAVEFORM_TIME_UNITS = [ - "s", - "sec", - "second", - "seconds", - "ms", - "millisecond", - "milliseconds", - "us", - "microsecond", - "microseconds", - "ns", - "nanosecond", - "nanoseconds", -] - def detect_properties(obj: TdmsChannel | TdmsGroup, possible_props: list, default: str = "") -> str: """Return the first matching property value from a list of possible property names.""" From 89167da483f250f9d78beff45e91e3fec8a22c1f Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 17 Apr 2026 12:20:40 -0700 Subject: [PATCH 13/16] use bytesIO in tests, rename detect_config to detect_tdms_config --- python/lib/sift_client/_internal/util/tdms.py | 7 ++- .../sift_client/_tests/_internal/test_tdms.py | 63 ++++++++++--------- .../lib/sift_client/resources/data_imports.py | 2 +- 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/python/lib/sift_client/_internal/util/tdms.py b/python/lib/sift_client/_internal/util/tdms.py index 32375a349..1a3417091 100644 --- a/python/lib/sift_client/_internal/util/tdms.py +++ b/python/lib/sift_client/_internal/util/tdms.py @@ -7,6 +7,7 @@ if TYPE_CHECKING: from pathlib import Path + from typing import BinaryIO from nptdms import TdmsChannel, TdmsFile, TdmsGroup, types from sift_client._internal.util.numpy_types import numpy_to_sift_type @@ -103,15 +104,15 @@ def find_time_channel(group: TdmsGroup) -> str | None: return None -def detect_config( - file_path: str | Path, +def detect_tdms_config( + file_path: str | Path | BinaryIO, asset_name: str = "", fallback_method: TdmsFallbackMethod = TdmsFallbackMethod.FAIL_ON_ERROR, ) -> TdmsImportConfig: """Detect a TDMS import config by inspecting the file's channels. Args: - file_path: Path to the TDMS file. + file_path: Path to the TDMS file, or a binary file-like object. asset_name: The asset name to set on the config. fallback_method: How to handle channels with missing timing information. diff --git a/python/lib/sift_client/_tests/_internal/test_tdms.py b/python/lib/sift_client/_tests/_internal/test_tdms.py index 8a7ae5fbe..d0e18b0a3 100644 --- a/python/lib/sift_client/_tests/_internal/test_tdms.py +++ b/python/lib/sift_client/_tests/_internal/test_tdms.py @@ -1,33 +1,36 @@ -"""Tests for detect_config (TDMS).""" +"""Tests for detect_tdms_config.""" + +import io import numpy as np import pytest from nptdms import ChannelObject, GroupObject, RootObject, TdmsWriter -from sift_client._internal.util.tdms import detect_config +from sift_client._internal.util.tdms import detect_tdms_config from sift_client.sift_types.channel import ChannelDataType from sift_client.sift_types.data_import import TdmsComplexComponent, TdmsFallbackMethod @pytest.fixture -def create_tdms_file(tmp_path): - """Return a helper that writes a TDMS file and returns its path.""" - file_path = tmp_path / "test.tdms" +def create_tdms_file(): + """Return a helper that writes a TDMS file in memory and returns a BytesIO buffer.""" def _create(root_props=None, groups=None): - """Write a TDMS file. + """Write a TDMS file to an in-memory buffer. Args: root_props: dict of root-level file properties. groups: list of (group_name, channels) tuples where channels is a list of ChannelObject instances. """ + buf = io.BytesIO() root = RootObject(properties=root_props or {}) - with TdmsWriter(file_path) as writer: + with TdmsWriter(buf) as writer: for group_name, channels in groups or []: group = GroupObject(group_name) writer.write_segment([root, group, *channels]) - return file_path + buf.seek(0) + return buf return _create @@ -35,7 +38,7 @@ def _create(root_props=None, groups=None): class TestDetectConfig: def test_waveform_channels(self, create_tdms_file): """Channels with wf_start_offset and wf_increment are detected as waveform channels.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "sensors", @@ -55,7 +58,7 @@ def test_waveform_channels(self, create_tdms_file): ] ) - config = detect_config(path) + config = detect_tdms_config(tdms) assert len(config.data) == 1 assert config.data[0].name == "sensors.voltage" @@ -66,7 +69,7 @@ def test_waveform_channels(self, create_tdms_file): def test_time_channel_detection(self, create_tdms_file): """A channel with TimeStamp type is used as the time source and excluded from data.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "group1", @@ -89,7 +92,7 @@ def test_time_channel_detection(self, create_tdms_file): ] ) - config = detect_config(path) + config = detect_tdms_config(tdms) channel_names = [d.name for d in config.data] assert "group1.timestamp" not in channel_names @@ -104,7 +107,7 @@ def test_common_time_name_not_detected(self, create_tdms_file): or a native TimeStamp-typed first channel. Name-based fallbacks were intentionally removed. """ - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "data", @@ -125,11 +128,11 @@ def test_common_time_name_not_detected(self, create_tdms_file): ) with pytest.raises(ValueError, match="No timing information"): - detect_config(path) + detect_tdms_config(tdms) def test_complex_channels_split(self, create_tdms_file): """Complex-valued channels are split into .real and .imag entries.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "rf", @@ -149,7 +152,7 @@ def test_complex_channels_split(self, create_tdms_file): ] ) - config = detect_config(path) + config = detect_tdms_config(tdms) assert len(config.data) == 2 names = [d.name for d in config.data] @@ -165,7 +168,7 @@ def test_complex_channels_split(self, create_tdms_file): def test_unit_and_description_detection(self, create_tdms_file): """Units and descriptions are read from TDMS channel properties.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "sensors", @@ -187,14 +190,14 @@ def test_unit_and_description_detection(self, create_tdms_file): ] ) - config = detect_config(path) + config = detect_tdms_config(tdms) assert config.data[0].units == "V" assert config.data[0].description == "Channel: Supply voltage" def test_fallback_fail_on_error(self, create_tdms_file): """Channels without timing info raise ValueError when fallback is FAIL_ON_ERROR.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "data", @@ -210,11 +213,11 @@ def test_fallback_fail_on_error(self, create_tdms_file): ) with pytest.raises(ValueError, match="No timing information"): - detect_config(path, fallback_method=TdmsFallbackMethod.FAIL_ON_ERROR) + detect_tdms_config(tdms, fallback_method=TdmsFallbackMethod.FAIL_ON_ERROR) def test_fallback_ignore_error(self, create_tdms_file): """Channels without timing info are silently skipped when fallback is IGNORE_ERROR.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "data", @@ -229,14 +232,14 @@ def test_fallback_ignore_error(self, create_tdms_file): ] ) - config = detect_config(path, fallback_method=TdmsFallbackMethod.IGNORE_ERROR) + config = detect_tdms_config(tdms, fallback_method=TdmsFallbackMethod.IGNORE_ERROR) assert len(config.data) == 0 assert config.fallback_method == TdmsFallbackMethod.IGNORE_ERROR def test_multiple_groups(self, create_tdms_file): """Channels from multiple groups are all detected with correct group_name.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "group_a", @@ -271,7 +274,7 @@ def test_multiple_groups(self, create_tdms_file): ] ) - config = detect_config(path) + config = detect_tdms_config(tdms) assert len(config.data) == 2 assert config.data[0].group_name == "group_a" @@ -286,7 +289,7 @@ def test_enum_channel_detection(self, create_tdms_file): import json enum_config = json.dumps({"0": "Off", "1": "On", "2": "Error"}) - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "status", @@ -307,7 +310,7 @@ def test_enum_channel_detection(self, create_tdms_file): ] ) - config = detect_config(path) + config = detect_tdms_config(tdms) assert len(config.data) == 1 assert config.data[0].data_type == ChannelDataType.ENUM @@ -315,7 +318,7 @@ def test_enum_channel_detection(self, create_tdms_file): def test_asset_name_passthrough(self, create_tdms_file): """The asset_name parameter is set on the returned config.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "g", @@ -335,13 +338,13 @@ def test_asset_name_passthrough(self, create_tdms_file): ] ) - config = detect_config(path, asset_name="my-asset") + config = detect_tdms_config(tdms, asset_name="my-asset") assert config.asset_name == "my-asset" def test_xchannel_property(self, create_tdms_file): """Group-level 'xchannel' property overrides time channel detection.""" - path = create_tdms_file( + tdms = create_tdms_file( groups=[ ( "data", @@ -368,7 +371,7 @@ def test_xchannel_property(self, create_tdms_file): from sift_client._internal.util.tdms import find_time_channel - with TdmsFile.open(path) as tdms_file: + with TdmsFile.open(tdms) as tdms_file: group = tdms_file["data"] # Simulate xchannel property group.properties["xchannel"] = "custom_time" diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py index 5b4651442..ba83fc4d0 100644 --- a/python/lib/sift_client/resources/data_imports.py +++ b/python/lib/sift_client/resources/data_imports.py @@ -7,7 +7,7 @@ from sift_client._internal.util.executor import run_sync_function from sift_client._internal.util.file import extract_parquet_footer, upload_file from sift_client._internal.util.hdf5 import detect_hdf5_config -from sift_client._internal.util.tdms import detect_config as detect_tdms_config +from sift_client._internal.util.tdms import detect_tdms_config from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset from sift_client.sift_types.channel import ChannelDataType From 8c20a8f40edb517380bc08e3b77eb3e46e430c3a Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 17 Apr 2026 12:47:08 -0700 Subject: [PATCH 14/16] clarify TdmsFallbackMethod docstring, update error message --- python/lib/sift_client/resources/data_imports.py | 3 +-- python/lib/sift_client/sift_types/data_import.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py index ba83fc4d0..458937300 100644 --- a/python/lib/sift_client/resources/data_imports.py +++ b/python/lib/sift_client/resources/data_imports.py @@ -282,8 +282,7 @@ def _read_sample() -> bytes: raise ValueError( f"No supported configuration detected for '{path.name}'. " - "Auto-detection supports CSV and Parquet files. " - "For other formats, provide a config manually." + "Only CSV, Parquet, HDF5, and TDMS are supported by auto-detection." ) diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index ff55e9beb..cd85e385e 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -542,10 +542,20 @@ def _from_proto( class TdmsFallbackMethod(Enum): - """Controls behavior when TDMS channels lack timing information.""" + """Controls handling of TDMS channels without timing information + during ``detect_tdms_config``. + + A channel has timing information when the group defines an + ``xchannel``, the first channel is a ``TimeStamp`` type, or the + channel has waveform properties (``wf_start_offset`` + + ``wf_increment``). + """ FAIL_ON_ERROR = TDMS_FALLBACK_METHOD_FAIL_ON_ERROR + """Raise if any channel lacks timing information.""" + IGNORE_ERROR = TDMS_FALLBACK_METHOD_IGNORE_ERROR + """Skip channels that lack timing information.""" class TdmsComplexComponent(Enum): From 319b3c99aaca010c1f6255893ce2dca94b4df4c3 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 17 Apr 2026 12:51:35 -0700 Subject: [PATCH 15/16] clarify tdmsfallback use-case --- python/lib/sift_client/sift_types/data_import.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index cd85e385e..d6577931c 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -549,6 +549,12 @@ class TdmsFallbackMethod(Enum): ``xchannel``, the first channel is a ``TimeStamp`` type, or the channel has waveform properties (``wf_start_offset`` + ``wf_increment``). + + Use ``IGNORE_ERROR`` when a file mixes timeseries and + non-timeseries data (e.g., binary blobs, spectra) and you want the + non-timeseries channels skipped. Alternatively, build + ``TdmsImportConfig.data`` explicitly to import only the valid + timeseries channels. """ FAIL_ON_ERROR = TDMS_FALLBACK_METHOD_FAIL_ON_ERROR From b5db471f315e19edf0d4aea985f5d71f174bd866 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 17 Apr 2026 14:02:51 -0700 Subject: [PATCH 16/16] updated tdms fallback doc --- python/lib/sift_client/sift_types/data_import.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index d6577931c..beb2174c4 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -543,7 +543,10 @@ def _from_proto( class TdmsFallbackMethod(Enum): """Controls handling of TDMS channels without timing information - during ``detect_tdms_config``. + during auto-detection. + + Auto-detection runs when you call ``detect_tdms_config`` or when + you import a ``TdmsImportConfig`` with empty ``data``. A channel has timing information when the group defines an ``xchannel``, the first channel is a ``TimeStamp`` type, or the