sift-stack · marc-sift · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/python/lib/sift_py/data_import/_tdms_test.py b/python/lib/sift_py/data_import/_tdms_test.py
@@ -1,9 +1,18 @@
+import io
 import json
 from typing import Any, Dict, List, Optional
 
+import numpy as np
 import pandas as pd
 import pytest
-from nptdms import TdmsFile, types  # type: ignore
+from nptdms import (  # type: ignore
+    ChannelObject,
+    GroupObject,
+    RootObject,
+    TdmsFile,
+    TdmsWriter,
+    types,
+)
 from pytest_mock import MockFixture
 from sift.metadata.v1.metadata_pb2 import MetadataKeyType
 
@@ -24,8 +33,28 @@ def __init__(
         self.group_name: str = group_name
         self.properties: Optional[Dict[str, str]] = properties or {}
         self.data: Optional[List[int]] = data or []
+        self.raw_data = self.data
         self.data_type: type = data_type
 
+        tdms_to_numpy = {
+            types.Int8: np.dtype(np.int8),
+            types.Int16: np.dtype(np.int16),
+            types.Int32: np.dtype(np.int32),
+            types.Int64: np.dtype(np.int64),
+            types.Uint8: np.dtype(np.uint8),
+            types.Uint16: np.dtype(np.uint16),
+            types.Uint32: np.dtype(np.uint32),
+            types.Uint64: np.dtype(np.uint64),
+            types.SingleFloat: np.dtype(np.float32),
+            types.DoubleFloat: np.dtype(np.float64),
+            types.Boolean: np.dtype(np.bool_),
+            types.String: np.dtype(np.str_),
+            types.TimeStamp: None,
+            types.ComplexSingleFloat: np.dtype(np.complex64),
+            types.ComplexDoubleFloat: np.dtype(np.complex128),
+        }
+        self.dtype = tdms_to_numpy[self.data_type]
+
 
 class MockTdmsGroup:
     def __init__(self, name, channels: List[MockTdmsChannel]):
@@ -92,6 +121,39 @@ def mock_waveform_tdms_file():
     return MockTdmsFile(mock_tdms_groups)
 
 
+@pytest.fixture
+def waveform_tdms_file_with_scaling():
+    group = GroupObject("Group 0")
+    valid_channels = [
+        ChannelObject(
+            group="Group 0",
+            channel=f"Test/channel_{c}",
+            data=[1, 2, 3],
+            properties={
+                "wf_start_time": np.datetime64("2025-10-19T00:00:00.000000"),
+                "wf_increment": 0.1,
+                "wf_start_offset": 0,
+                "extra": "info",
+                "NI_Scaling_Status": "scaled" if c == 0 else "unscaled",
+                "NI_Number_Of_Scales": 1,
+                "NI_Scale[0]_Scale_Type": "Linear",
+                "NI_Scale[0]_Linear_Slope": 1.5,
+                "NI_Scale[0]_Linear_Y_Intercept": 10,
+                "NI_Scale[0]_Linear_Input_Source": 0xFFFFFFFF,
+            },
+        )
+        for c in range(3)
+    ]
+
+    file_bytes = io.BytesIO()
+    with TdmsWriter(file_bytes) as tdms_writer:
+        root_object = RootObject({})
+        tdms_writer.write_segment([root_object] + [group] + valid_channels)
+
+    file_bytes.seek(0)
+    return TdmsFile(file_bytes)
+
+
 @pytest.fixture
 def mock_time_channel_tdms_file():
     mock_tdms_groups = [
@@ -586,6 +648,7 @@ def test_tdms_upload_unknown_data_type(mocker: MockFixture, mock_waveform_tdms_f
     mock_requests_post.return_value = MockResponse()
 
     mock_waveform_tdms_file.groups()[0].channels()[0].data_type = types.ComplexDoubleFloat
+    mock_waveform_tdms_file.groups()[0].channels()[0].dtype = np.dtype(np.complex128)
     mocker.patch("sift_py.data_import.tdms.TdmsFile").return_value = mock_waveform_tdms_file
 
     svc = TdmsUploadService(rest_config)
@@ -887,3 +950,111 @@ def test_tdms_upload_service_upload_with_metadata_run_id(
     # Metadata keys should match those in the mock_tdms_file properties
     keys = [md["key"]["name"] for md in patch_data["run"]["metadata"]]
     assert set(keys) == set(mock_waveform_tdms_file.properties.keys())
+
+
+def test_waveform_tdms_with_scaling_upload_success(
+    mocker: MockFixture, waveform_tdms_file_with_scaling: MockTdmsFile
+):
+    mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file")
+    mock_path_is_file.return_value = True
+
+    mock_path_getsize = mocker.patch("sift_py.data_import.csv.os.path.getsize")
+    mock_path_getsize.return_value = 10
+
+    mock_requests_post = mocker.patch("sift_py.rest.requests.Session.post")
+    mock_requests_post.return_value = MockResponse()
+
+    def mock_tdms_file_constructor(path):
+        """The first call should always return the mocked object since
+        it is mocking a call to open the orignal tdms file.
+
+        The second call should return a real TdmsFile since the unit
+        test will actually create one with filtered channels.
+        """
+        if path == "some_tdms.tdms":
+            return waveform_tdms_file_with_scaling
+        else:
+            return TdmsFile(path)
+
+    mocker.patch("sift_py.data_import.tdms.TdmsFile", mock_tdms_file_constructor)
+
+    # Create a mock file so we can cpature the data that's written
+    class MockNamedTemporaryFile:
+        def __init__(self, **kwargs):
+            self.data = ""
+            self.name = "filename.csv"
+
+        def write(self, data: str):
+            self.data += data
+            return len(data)
+
+        def close(self):
+            pass
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            pass
+
+    mock_temp_files = []
+
+    def mock_temp_file_constructor(**kwargs):
+        mf = MockNamedTemporaryFile(**kwargs)
+        mock_temp_files.append(mf)
+        return mf
+
+    mocker.patch("sift_py.data_import.tdms.NamedTemporaryFile", mock_temp_file_constructor)
+
+    svc = TdmsUploadService(rest_config)
+
+    def get_csv_config(mock, n):
+        """Return the CSV config that was created and uploaded under the hood."""
+        return json.loads(mock_requests_post.call_args_list[n].kwargs["data"])["csv_config"]
+
+    # Test without grouping
+    svc.upload("some_tdms.tdms", "asset_name")
+    config = get_csv_config(mock_requests_post, 0)
+    expected_config: Dict[str, Any] = {
+        "asset_name": "asset_name",
+        "run_name": "",
+        "run_id": "",
+        "first_data_row": 2,
+        "time_column": {
+            "format": "TIME_FORMAT_ABSOLUTE_DATETIME",
+            "column_number": 1,
+            "relative_start_time": None,
+        },
+        "data_columns": {},
+    }
+    for i in range(3):
+        expected_config["data_columns"][str(2 + i)] = {
+            "name": f"Test/channel_{i}",
+            "data_type": "CHANNEL_DATA_TYPE_INT_32" if i == 0 else "CHANNEL_DATA_TYPE_DOUBLE",
+            "units": "",
+            "description": "",
+            "enum_types": [],
+            "bit_field_elements": [],
+        }
+    assert config == expected_config
+
+    # Create a pandas DataFrame with the expected resulting CSV data
+    # Values should be scaled correctly.
+    df = pd.DataFrame(
+        {
+            "": [
+                np.datetime64("2025-10-19T00:00:00.000000"),
+                np.datetime64("2025-10-19T00:00:00.100000"),
+                np.datetime64("2025-10-19T00:00:00.200000"),
+            ],
+            "/'Group 0'/'Test/channel_0'": [1, 2, 3],
+            "/'Group 0'/'Test/channel_1'": [11.5, 13.0, 14.5],
+            "/'Group 0'/'Test/channel_2'": [11.5, 13.0, 14.5],
+        }
+    )
+
+    csv_buffer = io.StringIO()
+    df.to_csv(csv_buffer, index=False)
+    csv_content = csv_buffer.getvalue()
+
+    assert mock_temp_files[0].data == csv_content
diff --git a/python/lib/sift_py/data_import/tdms.py b/python/lib/sift_py/data_import/tdms.py
@@ -36,19 +36,21 @@
 from sift_py.ingestion.channel import ChannelDataType
 from sift_py.rest import SiftRestConfig
 
-TDMS_TO_SIFT_TYPES = {
-    types.Boolean: ChannelDataType.BOOL,
-    types.Int8: ChannelDataType.INT_32,
-    types.Int16: ChannelDataType.INT_32,
-    types.Int32: ChannelDataType.INT_32,
-    types.Int64: ChannelDataType.INT_64,
-    types.Uint8: ChannelDataType.UINT_32,
-    types.Uint16: ChannelDataType.UINT_32,
-    types.Uint32: ChannelDataType.UINT_32,
-    types.Uint64: ChannelDataType.UINT_64,
-    types.SingleFloat: ChannelDataType.FLOAT,
-    types.DoubleFloat: ChannelDataType.DOUBLE,
-    types.String: ChannelDataType.STRING,
+# Mapping from numpy data types to Sift ChannelDataType
+NUMPY_TO_SIFT_TYPES = {
+    np.bool_: ChannelDataType.BOOL,
+    np.int8: ChannelDataType.INT_32,
+    np.int16: ChannelDataType.INT_32,
+    np.int32: ChannelDataType.INT_32,
+    np.int64: ChannelDataType.INT_64,
+    np.uint8: ChannelDataType.UINT_32,
+    np.uint16: ChannelDataType.UINT_32,
+    np.uint32: ChannelDataType.UINT_32,
+    np.uint64: ChannelDataType.UINT_64,
+    np.float32: ChannelDataType.FLOAT,
+    np.float64: ChannelDataType.DOUBLE,
+    np.str_: ChannelDataType.STRING,
+    np.object_: ChannelDataType.STRING,
 }
 
 
@@ -65,7 +67,9 @@ class TdmsTimeFormat(Enum):
 # Implements the same interface as TdmsChannel. Allows us to create
 # TdmsChannel like objects without having to save and read the channels to
 # a file.
-_TdmsChannel = namedtuple("_TdmsChannel", ["group_name", "name", "data_type", "data", "properties"])
+_TdmsChannel = namedtuple(
+    "_TdmsChannel", ["group_name", "name", "data_type", "data", "properties", "dtype"]
+)
 
 
 CHARACTER_REPLACEMENTS = {
@@ -282,7 +286,7 @@ def contains_timing(channel: TdmsChannel) -> bool:
                     new_channel = ChannelObject(
                         group=sanitize_string(channel.group_name),
                         channel=sanitize_string(channel.name),
-                        data=channel.data,
+                        data=channel.raw_data,
                         properties=channel.properties,
                     )
                     valid_channels.append(new_channel)
@@ -407,6 +411,7 @@ def get_time_channels(group: TdmsGroup) -> List[TdmsChannel]:
                     group_name=updated_group_name,
                     name=updated_channel_name,
                     data_type=channel.data_type,
+                    dtype=channel.dtype,
                     data=data,
                     properties=channel.properties,
                 )
@@ -485,12 +490,12 @@ def _create_csv_config(
         first_data_column = 2
         for i, channel in enumerate(channels):
             try:
-                data_type = TDMS_TO_SIFT_TYPES[channel.data_type].as_human_str(api_format=True)
+                data_type = NUMPY_TO_SIFT_TYPES[channel.dtype.type].as_human_str(api_format=True)
             except KeyError:
                 data_type = None
 
             if data_type is None:
-                raise Exception(f"{channel.name} data type not supported: {channel.data_type}")
+                raise Exception(f"{channel.name} data type not supported: {channel.dtype}")
 
             channel_config = DataColumn(
                 name=_channel_fqn(name=channel.name, component=channel.group_name)