Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 172 additions & 1 deletion python/lib/sift_py/data_import/_tdms_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
import io
import json
from typing import Any, Dict, List, Optional

import numpy as np
import pandas as pd
import pytest
from nptdms import TdmsFile, types # type: ignore
from nptdms import ( # type: ignore
ChannelObject,
GroupObject,
RootObject,
TdmsFile,
TdmsWriter,
types,
)
from pytest_mock import MockFixture
from sift.metadata.v1.metadata_pb2 import MetadataKeyType

Expand All @@ -24,8 +33,28 @@ def __init__(
self.group_name: str = group_name
self.properties: Optional[Dict[str, str]] = properties or {}
self.data: Optional[List[int]] = data or []
self.raw_data = self.data
self.data_type: type = data_type

tdms_to_numpy = {
types.Int8: np.dtype(np.int8),
types.Int16: np.dtype(np.int16),
types.Int32: np.dtype(np.int32),
types.Int64: np.dtype(np.int64),
types.Uint8: np.dtype(np.uint8),
types.Uint16: np.dtype(np.uint16),
types.Uint32: np.dtype(np.uint32),
types.Uint64: np.dtype(np.uint64),
types.SingleFloat: np.dtype(np.float32),
types.DoubleFloat: np.dtype(np.float64),
types.Boolean: np.dtype(np.bool_),
types.String: np.dtype(np.str_),
types.TimeStamp: None,
types.ComplexSingleFloat: np.dtype(np.complex64),
types.ComplexDoubleFloat: np.dtype(np.complex128),
}
self.dtype = tdms_to_numpy[self.data_type]


class MockTdmsGroup:
def __init__(self, name, channels: List[MockTdmsChannel]):
Expand Down Expand Up @@ -92,6 +121,39 @@ def mock_waveform_tdms_file():
return MockTdmsFile(mock_tdms_groups)


@pytest.fixture
def waveform_tdms_file_with_scaling():
group = GroupObject("Group 0")
valid_channels = [
ChannelObject(
group="Group 0",
channel=f"Test/channel_{c}",
data=[1, 2, 3],
properties={
"wf_start_time": np.datetime64("2025-10-19T00:00:00.000000"),
"wf_increment": 0.1,
"wf_start_offset": 0,
"extra": "info",
"NI_Scaling_Status": "scaled" if c == 0 else "unscaled",
"NI_Number_Of_Scales": 1,
"NI_Scale[0]_Scale_Type": "Linear",
"NI_Scale[0]_Linear_Slope": 1.5,
"NI_Scale[0]_Linear_Y_Intercept": 10,
"NI_Scale[0]_Linear_Input_Source": 0xFFFFFFFF,
},
)
for c in range(3)
]

file_bytes = io.BytesIO()
with TdmsWriter(file_bytes) as tdms_writer:
root_object = RootObject({})
tdms_writer.write_segment([root_object] + [group] + valid_channels)

file_bytes.seek(0)
return TdmsFile(file_bytes)


@pytest.fixture
def mock_time_channel_tdms_file():
mock_tdms_groups = [
Expand Down Expand Up @@ -586,6 +648,7 @@ def test_tdms_upload_unknown_data_type(mocker: MockFixture, mock_waveform_tdms_f
mock_requests_post.return_value = MockResponse()

mock_waveform_tdms_file.groups()[0].channels()[0].data_type = types.ComplexDoubleFloat
mock_waveform_tdms_file.groups()[0].channels()[0].dtype = np.dtype(np.complex128)
mocker.patch("sift_py.data_import.tdms.TdmsFile").return_value = mock_waveform_tdms_file

svc = TdmsUploadService(rest_config)
Expand Down Expand Up @@ -887,3 +950,111 @@ def test_tdms_upload_service_upload_with_metadata_run_id(
# Metadata keys should match those in the mock_tdms_file properties
keys = [md["key"]["name"] for md in patch_data["run"]["metadata"]]
assert set(keys) == set(mock_waveform_tdms_file.properties.keys())


def test_waveform_tdms_with_scaling_upload_success(
mocker: MockFixture, waveform_tdms_file_with_scaling: MockTdmsFile
):
mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file")
mock_path_is_file.return_value = True

mock_path_getsize = mocker.patch("sift_py.data_import.csv.os.path.getsize")
mock_path_getsize.return_value = 10

mock_requests_post = mocker.patch("sift_py.rest.requests.Session.post")
mock_requests_post.return_value = MockResponse()

def mock_tdms_file_constructor(path):
"""The first call should always return the mocked object since
it is mocking a call to open the orignal tdms file.

The second call should return a real TdmsFile since the unit
test will actually create one with filtered channels.
"""
if path == "some_tdms.tdms":
return waveform_tdms_file_with_scaling
else:
return TdmsFile(path)

mocker.patch("sift_py.data_import.tdms.TdmsFile", mock_tdms_file_constructor)

# Create a mock file so we can cpature the data that's written
class MockNamedTemporaryFile:
def __init__(self, **kwargs):
self.data = ""
self.name = "filename.csv"

def write(self, data: str):
self.data += data
return len(data)

def close(self):
pass

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
pass

mock_temp_files = []

def mock_temp_file_constructor(**kwargs):
mf = MockNamedTemporaryFile(**kwargs)
mock_temp_files.append(mf)
return mf

mocker.patch("sift_py.data_import.tdms.NamedTemporaryFile", mock_temp_file_constructor)

svc = TdmsUploadService(rest_config)

def get_csv_config(mock, n):
"""Return the CSV config that was created and uploaded under the hood."""
return json.loads(mock_requests_post.call_args_list[n].kwargs["data"])["csv_config"]

# Test without grouping
svc.upload("some_tdms.tdms", "asset_name")
config = get_csv_config(mock_requests_post, 0)
expected_config: Dict[str, Any] = {
"asset_name": "asset_name",
"run_name": "",
"run_id": "",
"first_data_row": 2,
"time_column": {
"format": "TIME_FORMAT_ABSOLUTE_DATETIME",
"column_number": 1,
"relative_start_time": None,
},
"data_columns": {},
}
for i in range(3):
expected_config["data_columns"][str(2 + i)] = {
"name": f"Test/channel_{i}",
"data_type": "CHANNEL_DATA_TYPE_INT_32" if i == 0 else "CHANNEL_DATA_TYPE_DOUBLE",
"units": "",
"description": "",
"enum_types": [],
"bit_field_elements": [],
}
assert config == expected_config

# Create a pandas DataFrame with the expected resulting CSV data
# Values should be scaled correctly.
df = pd.DataFrame(
{
"": [
np.datetime64("2025-10-19T00:00:00.000000"),
np.datetime64("2025-10-19T00:00:00.100000"),
np.datetime64("2025-10-19T00:00:00.200000"),
],
"/'Group 0'/'Test/channel_0'": [1, 2, 3],
"/'Group 0'/'Test/channel_1'": [11.5, 13.0, 14.5],
"/'Group 0'/'Test/channel_2'": [11.5, 13.0, 14.5],
}
)

csv_buffer = io.StringIO()
df.to_csv(csv_buffer, index=False)
csv_content = csv_buffer.getvalue()

assert mock_temp_files[0].data == csv_content
39 changes: 22 additions & 17 deletions python/lib/sift_py/data_import/tdms.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,21 @@
from sift_py.ingestion.channel import ChannelDataType
from sift_py.rest import SiftRestConfig

TDMS_TO_SIFT_TYPES = {
types.Boolean: ChannelDataType.BOOL,
types.Int8: ChannelDataType.INT_32,
types.Int16: ChannelDataType.INT_32,
types.Int32: ChannelDataType.INT_32,
types.Int64: ChannelDataType.INT_64,
types.Uint8: ChannelDataType.UINT_32,
types.Uint16: ChannelDataType.UINT_32,
types.Uint32: ChannelDataType.UINT_32,
types.Uint64: ChannelDataType.UINT_64,
types.SingleFloat: ChannelDataType.FLOAT,
types.DoubleFloat: ChannelDataType.DOUBLE,
types.String: ChannelDataType.STRING,
# Mapping from numpy data types to Sift ChannelDataType
NUMPY_TO_SIFT_TYPES = {
np.bool_: ChannelDataType.BOOL,
np.int8: ChannelDataType.INT_32,
np.int16: ChannelDataType.INT_32,
np.int32: ChannelDataType.INT_32,
np.int64: ChannelDataType.INT_64,
np.uint8: ChannelDataType.UINT_32,
np.uint16: ChannelDataType.UINT_32,
np.uint32: ChannelDataType.UINT_32,
np.uint64: ChannelDataType.UINT_64,
np.float32: ChannelDataType.FLOAT,
np.float64: ChannelDataType.DOUBLE,
np.str_: ChannelDataType.STRING,
np.object_: ChannelDataType.STRING,
}


Expand All @@ -65,7 +67,9 @@ class TdmsTimeFormat(Enum):
# Implements the same interface as TdmsChannel. Allows us to create
# TdmsChannel like objects without having to save and read the channels to
# a file.
_TdmsChannel = namedtuple("_TdmsChannel", ["group_name", "name", "data_type", "data", "properties"])
_TdmsChannel = namedtuple(
"_TdmsChannel", ["group_name", "name", "data_type", "data", "properties", "dtype"]
)


CHARACTER_REPLACEMENTS = {
Expand Down Expand Up @@ -282,7 +286,7 @@ def contains_timing(channel: TdmsChannel) -> bool:
new_channel = ChannelObject(
group=sanitize_string(channel.group_name),
channel=sanitize_string(channel.name),
data=channel.data,
data=channel.raw_data,
properties=channel.properties,
)
valid_channels.append(new_channel)
Expand Down Expand Up @@ -407,6 +411,7 @@ def get_time_channels(group: TdmsGroup) -> List[TdmsChannel]:
group_name=updated_group_name,
name=updated_channel_name,
data_type=channel.data_type,
dtype=channel.dtype,
data=data,
properties=channel.properties,
)
Expand Down Expand Up @@ -485,12 +490,12 @@ def _create_csv_config(
first_data_column = 2
for i, channel in enumerate(channels):
try:
data_type = TDMS_TO_SIFT_TYPES[channel.data_type].as_human_str(api_format=True)
data_type = NUMPY_TO_SIFT_TYPES[channel.dtype.type].as_human_str(api_format=True)
except KeyError:
data_type = None

if data_type is None:
raise Exception(f"{channel.name} data type not supported: {channel.data_type}")
raise Exception(f"{channel.name} data type not supported: {channel.dtype}")

channel_config = DataColumn(
name=_channel_fqn(name=channel.name, component=channel.group_name)
Expand Down
Loading